-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added analyze scripts, also temperature to log file
- Loading branch information
Showing
6 changed files
with
569 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
""" | ||
analyze_log.py | ||
Read and parse a RAMD log file. Construct a series of milestoning models from | ||
the trajectory fragments for a set of bins of RAMD force vectors. The | ||
information from the milestoning models will be used in a sigma-RAMD model | ||
that will allow one to expand the residence time in a Taylor series expansion | ||
according to sigma-RAMD theory. | ||
""" | ||
|
||
import sys | ||
import argparse | ||
|
||
import numpy as np | ||
|
||
import parser | ||
import milestoning | ||
import sigma_ramd | ||
|
||
def analyze_log(ramd_log_file_list, num_milestones=10, num_xi_bins=5, | ||
verbose=True): | ||
if verbose: | ||
print(f"Using {num_milestones} milestones.") | ||
print(f"Using {num_xi_bins} angle bins.") | ||
xi_bins_ranges = np.linspace(-1.0, 1.0, num_xi_bins+1) | ||
trajectory_list = [] | ||
forceOutFreq = None | ||
forceRAMD = None | ||
timeStep = None | ||
for ramd_log_filename in ramd_log_file_list: | ||
trajectories, forceOutFreq_, forceRAMD_, timeStep_ \ | ||
= parser.parse_ramd_log_file(ramd_log_filename) | ||
if forceOutFreq is None: | ||
forceOutFreq = forceOutFreq_ | ||
forceRAMD = forceRAMD_ | ||
timeStep = timeStep_ | ||
else: | ||
assert forceOutFreq == forceOutFreq_, "RAMD logs contain differing forceOutFreq values." | ||
assert forceRAMD == forceRAMD_, "RAMD logs contain differing forceRAMD values." | ||
assert timeStep == timeStep_, "RAMD logs contain differing timeStep values." | ||
|
||
trajectory_list += trajectories | ||
|
||
if verbose: | ||
print("Number of trajectories extracted from log file(s):", | ||
len(trajectory_list)) | ||
print("forceOutFreq:", forceOutFreq) | ||
print("forceRAMD:", forceRAMD, "kcal/(mole * Angstrom)") | ||
print("timeStep:", timeStep, "ps") | ||
|
||
# Each trajectory represents a set of frames between force changes | ||
# Align the trajectories and convert to 1D, and xi values | ||
one_dim_trajs = parser.condense_trajectories(trajectories) | ||
|
||
#Construct a milestoning model based on trajectory fragments | ||
milestones, min_location, max_location, starting_cv_val \ | ||
= milestoning.uniform_milestone_locations(one_dim_trajs, num_milestones) | ||
|
||
xi_bins = [] | ||
for j in range(num_xi_bins): | ||
xi_bins.append([]) | ||
|
||
for i, trajectory in enumerate(one_dim_trajs): | ||
traj_xi = trajectory[0][1] | ||
found_bin = False | ||
for j in range(num_xi_bins): | ||
if (traj_xi >= xi_bins_ranges[j]) \ | ||
and (traj_xi <= xi_bins_ranges[j+1]): | ||
xi_bins[j].append(trajectory) | ||
found_bin = True | ||
break | ||
|
||
if not found_bin: | ||
raise Exception(f"xi value {traj_xi} not placed in bin.") | ||
|
||
frame_time = timeStep * forceOutFreq | ||
xi_bin_time_profiles = np.zeros((num_xi_bins, num_milestones, num_milestones)) | ||
for i in range(num_xi_bins): | ||
if verbose: | ||
print(f"num in bin {i}: {len(xi_bins[i])}") | ||
xi_bin_trajs = xi_bins[i] | ||
count_matrix, time_vector, rate_matrix, transition_matrix \ | ||
= milestoning.make_milestoning_model(xi_bin_trajs, milestones, frame_time, num_milestones) | ||
|
||
# use the milestoning model to construct time profiles | ||
|
||
time_profile_matrix = milestoning.make_time_profiles(transition_matrix, time_vector, num_milestones) | ||
xi_bin_time_profiles[i,:,:] = time_profile_matrix[:,:] | ||
|
||
# Use sigma-RAMD to analyze | ||
beta=0.0 # TODO: fill out | ||
calc = sigma_ramd.functional_expansion_1d_ramd( | ||
xi_bin_time_profiles, force_constant=forceRAMD, beta=beta, | ||
min_cv_val=min_location, max_cv_val=max_location, | ||
starting_cv_val=starting_cv_val) | ||
time_estimate = calc.make_second_order_time_estimate() | ||
print("time_estimate:", time_estimate) | ||
|
||
if __name__ == "__main__": | ||
argparser = argparse.ArgumentParser(description=__doc__) | ||
argparser.add_argument( | ||
"ramdLogFiles", metavar="RAMDLOGFILES", type=str, nargs="+", | ||
help="The RAMD log files to open and parse.") | ||
argparser.add_argument( | ||
"-n", "--numMilestones", dest="numMilestones", default=10, | ||
help="The number of milestones to use in the milestoning "\ | ||
"calculation.", type=int) | ||
argparser.add_argument( | ||
"-a", "--numAngleBins", dest="numAngleBins", default=5, | ||
help="The number of bins to divide the force vectors into.", type=int) | ||
|
||
args = argparser.parse_args() | ||
args = vars(args) | ||
ramdLogFiles = args["ramdLogFiles"] | ||
numMilestones = args["numMilestones"] | ||
numAngleBins = args["numAngleBins"] | ||
|
||
analyze_log(ramdLogFiles, numMilestones, numAngleBins) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
""" | ||
openmm_ramd/analyze/milestoning.py | ||
Prepare and utilize a milestoning model for RAMD simulation to use | ||
in a sigma-RAMD analysis. | ||
""" | ||
|
||
import numpy as np | ||
|
||
def uniform_milestone_locations(one_dim_trajs, num_milestones): | ||
""" | ||
Define a set of milestone locations uniformly spread. | ||
""" | ||
# First, locate the minimum and maximum positions along the CV | ||
min_location = 9e9 | ||
max_location = -9e9 | ||
starting_cv_val = None | ||
for i, trajectory in enumerate(one_dim_trajs): | ||
for j, frame in enumerate(trajectory): | ||
[cv_val, xi_val] = frame | ||
if i == 0 and j == 0: | ||
starting_cv_val = cv_val | ||
if cv_val < min_location: | ||
min_location = cv_val | ||
if cv_val > max_location: | ||
max_location = cv_val | ||
|
||
assert min_location < max_location, \ | ||
"The minimum must be less than the maximum." | ||
|
||
assert starting_cv_val is not None, "No starting CV value found." | ||
|
||
span = max_location - min_location | ||
h = span / (num_milestones + 1) | ||
milestones = [] | ||
for i in range(num_milestones): | ||
milestone = h*(i+1) | ||
milestones.append(milestone) | ||
|
||
return milestones, min_location, max_location, starting_cv_val | ||
|
||
def make_milestoning_model( | ||
xi_bin_trajs, milestones, frame_time, num_milestones): | ||
count_matrix = np.zeros((num_milestones, num_milestones), dtype=np.int32) | ||
time_vector = np.zeros((num_milestones, 1)) | ||
|
||
for i, traj in enumerate(xi_bin_trajs): | ||
src_milestone = None | ||
src_time = 0.0 | ||
new_cell_id = None | ||
src_time = -frame_time | ||
for j, frame in enumerate(traj): | ||
[cv_val, xi_val] = frame | ||
# Find what cell it's in | ||
old_cell_id = new_cell_id | ||
new_cell_id = num_milestones | ||
src_time += frame_time | ||
for k, milestone in enumerate(milestones): | ||
if cv_val < milestone: | ||
new_cell_id = k | ||
break | ||
if (old_cell_id is not None) and (new_cell_id != old_cell_id): | ||
# then a milestone transition has happened | ||
diff = new_cell_id - old_cell_id | ||
dest_milestone = new_cell_id - (diff + 1) // 2 | ||
if src_milestone != dest_milestone: | ||
if src_milestone is not None: | ||
assert src_milestone != dest_milestone | ||
count_matrix[dest_milestone, src_milestone] += 1 | ||
time_vector[src_milestone, 0] += src_time | ||
|
||
src_milestone = dest_milestone | ||
src_time = 0.0 | ||
|
||
#print("count_matrix:", count_matrix) | ||
#print("time_vector:", time_vector) | ||
|
||
transition_matrix = np.zeros((num_milestones, num_milestones)) | ||
for i in range(num_milestones): # column index | ||
column_sum = np.sum(count_matrix[:,i]) | ||
for j in range(num_milestones): # row index | ||
if column_sum == 0.0: | ||
transition_matrix[j,i] = 0.0 | ||
else: | ||
transition_matrix[j,i] = count_matrix[j,i] / column_sum | ||
|
||
rate_matrix = np.zeros((num_milestones, num_milestones)) | ||
for i in range(num_milestones): | ||
for j in range(num_milestones): | ||
if time_vector[j,0] == 0.0: | ||
rate_matrix[i,j] = 0.0 | ||
else: | ||
rate_matrix[i,j] = count_matrix[i,j] / time_vector[j,0] | ||
|
||
#print("rate_matrix:", rate_matrix) | ||
|
||
return count_matrix, time_vector, transition_matrix, rate_matrix | ||
|
||
def make_time_profiles(transition_matrix, time_vector, num_milestones): | ||
transit_time_matrix = np.zeros((num_milestones, num_milestones)) | ||
identity = np.identity(num_milestones) | ||
for i in range(num_milestones): | ||
transition_matrix_with_sink = np.copy(transition_matrix[:,:]) | ||
transition_matrix_with_sink[:,i] = 0.0 | ||
geom_series = np.linalg.inv(identity-transition_matrix_with_sink) | ||
result = time_vector.T @ geom_series | ||
transit_time_matrix[i,:] = result[0,:] | ||
|
||
return transit_time_matrix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
""" | ||
parser.py | ||
Parse RAMD log files, and use the log files to construct trajectories of | ||
CVs. | ||
""" | ||
|
||
import re | ||
import os | ||
|
||
import numpy as np | ||
|
||
def parse_ramd_log_file(ramd_log_filename): | ||
assert os.path.exists(ramd_log_filename), \ | ||
f"File not found: {ramd_log_filename}." | ||
trajectories = [] | ||
trajectory = [] | ||
n_frames = 0 | ||
relative_COM_x = None | ||
relative_COM_y = None | ||
relative_COM_z = None | ||
ligand_step = None | ||
ligand_COM_x = None | ||
ligand_COM_y = None | ||
ligand_COM_z = None | ||
with open(ramd_log_filename, "r") as f: | ||
for i, line in enumerate(f.readlines()): | ||
new_trajectory_frame = False | ||
new_trajectory = False | ||
# Determine the log output frequency | ||
forceOutFreq_search = re.match("RAMD: forceOutFreq * (\d+)", line) | ||
if forceOutFreq_search: | ||
forceOutFreq = int(forceOutFreq_search.group(1)) | ||
|
||
forceRAMD_search = re.match("RAMD: forceRAMD * (\d+\.\d*)", line) | ||
if forceRAMD_search: | ||
forceRAMD = float(forceRAMD_search.group(1)) | ||
|
||
timeStep_search = re.match("RAMD: timeStep * (\d+\.\d*)", line) | ||
if timeStep_search: | ||
timeStep = float(timeStep_search.group(1)) | ||
|
||
# Find the lines that indicate the ligand COM | ||
lig_com_search = re.match("RAMD FORCE: (\d+) > LIGAND COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) | ||
if lig_com_search: | ||
old_ligand_step = ligand_step | ||
ligand_step = int(lig_com_search.group(1)) | ||
old_ligand_COM_x = ligand_COM_x | ||
old_ligand_COM_y = ligand_COM_y | ||
old_ligand_COM_z = ligand_COM_z | ||
ligand_COM_x = float(lig_com_search.group(2)) | ||
ligand_COM_y = float(lig_com_search.group(3)) | ||
ligand_COM_z = float(lig_com_search.group(4)) | ||
new_trajectory_frame = True | ||
|
||
rec_com_search = re.match("RAMD FORCE: (\d+) > PROTEIN COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) | ||
if rec_com_search: | ||
receptor_step = int(rec_com_search.group(1)) | ||
receptor_COM_x = float(rec_com_search.group(2)) | ||
receptor_COM_y = float(rec_com_search.group(3)) | ||
receptor_COM_z = float(rec_com_search.group(4)) | ||
relative_COM_x = ligand_COM_x - receptor_COM_x | ||
relative_COM_y = ligand_COM_y - receptor_COM_y | ||
relative_COM_z = ligand_COM_z - receptor_COM_z | ||
continue | ||
|
||
accel_search = re.match("RAMD: (\d+) >>> KEEP PREVIOUS ACCELERATION DIRECTION: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) | ||
if accel_search: | ||
accel_step = int(accel_search.group(1)) | ||
accel_COM_x = float(accel_search.group(2)) | ||
accel_COM_y = float(accel_search.group(3)) | ||
accel_COM_z = float(accel_search.group(4)) | ||
continue | ||
|
||
change_accel_search = re.match("RAMD: (\d+) >>> CHANGE ACCELERATION DIRECTION TO: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) | ||
if change_accel_search: | ||
accel_step = int(change_accel_search.group(1)) | ||
#accel_COM_x = float(change_accel_search.group(2)) | ||
#accel_COM_y = float(change_accel_search.group(3)) | ||
#accel_COM_z = float(change_accel_search.group(4)) | ||
old_ligand_step = ligand_step | ||
new_trajectory_frame = True | ||
new_trajectory = True | ||
|
||
if new_trajectory_frame: | ||
if n_frames > 0: | ||
if relative_COM_x is not None: | ||
assert old_ligand_step == receptor_step, \ | ||
"The lines defining receptor and ligand COM are not the same RAMD step." | ||
frame = [relative_COM_x, relative_COM_y, relative_COM_z, | ||
accel_COM_x, accel_COM_y, accel_COM_z] | ||
else: | ||
frame = [old_ligand_COM_x, old_ligand_COM_y, old_ligand_COM_z, | ||
accel_COM_x, accel_COM_y, accel_COM_z] | ||
|
||
assert old_ligand_step == accel_step, \ | ||
"The lines defining ligand COM and accel vector are not the same RAMD step." | ||
|
||
trajectory.append(frame) | ||
|
||
n_frames += 1 | ||
|
||
if new_trajectory: | ||
|
||
trajectories.append(trajectory) | ||
trajectory = [] | ||
n_frames = 0 | ||
|
||
return trajectories, forceOutFreq, forceRAMD, timeStep | ||
|
||
def condense_trajectories(trajectories): | ||
""" | ||
Reduce trajectories to 1D, and compute xi values | ||
""" | ||
# First, find the average position of all ligand positions, this will | ||
# define our 1D CV | ||
sum_ligx = 0.0 | ||
sum_ligy = 0.0 | ||
sum_ligz = 0.0 | ||
total_points = 0 | ||
|
||
for i, trajectory in enumerate(trajectories): | ||
start_accx = None | ||
for j, frame in enumerate(trajectory): | ||
[ligx, ligy, ligz, accx, accy, accz] = frame | ||
sum_ligx += ligx | ||
sum_ligy += ligy | ||
sum_ligz += ligz | ||
total_points += 1 | ||
if start_accx is None: | ||
start_accx = accx | ||
else: | ||
assert start_accx == start_accx, \ | ||
"Every frame must have the same acceleration vector" | ||
|
||
avg_ligx = sum_ligx / total_points | ||
avg_ligy = sum_ligy / total_points | ||
avg_ligz = sum_ligz / total_points | ||
avg_lig_length = np.sqrt(avg_ligx**2 + avg_ligy**2 + avg_ligz**2) | ||
norm_avgx = avg_ligx / avg_lig_length | ||
norm_avgy = avg_ligy / avg_lig_length | ||
norm_avgz = avg_ligz / avg_lig_length | ||
|
||
one_dim_trajs = [] | ||
for i, trajectory in enumerate(trajectories): | ||
one_dim_traj = [] | ||
for j, frame in enumerate(trajectory): | ||
[ligx, ligy, ligz, accx, accy, accz] = frame | ||
# dot product between frame pos and avg_lig | ||
cv_val = (ligx*norm_avgx + ligy*norm_avgy + ligz*norm_avgz) | ||
# dot product between frame acc and avg_lig | ||
xi_val = (accx*norm_avgx + accy*norm_avgy + accz*norm_avgz) | ||
one_dim_traj.append([cv_val, xi_val]) | ||
|
||
one_dim_trajs.append(one_dim_traj) | ||
|
||
return one_dim_trajs |
Oops, something went wrong.