Skip to content

Commit

Permalink
Added analyze scripts, also temperature to log file
Browse files Browse the repository at this point in the history
  • Loading branch information
lvotapka committed Jun 16, 2023
1 parent c460df6 commit eff682f
Show file tree
Hide file tree
Showing 6 changed files with 569 additions and 5 deletions.
118 changes: 118 additions & 0 deletions openmm_ramd/analyze/analyze_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""
analyze_log.py
Read and parse a RAMD log file. Construct a series of milestoning models from
the trajectory fragments for a set of bins of RAMD force vectors. The
information from the milestoning models will be used in a sigma-RAMD model
that will allow one to expand the residence time in a Taylor series expansion
according to sigma-RAMD theory.
"""

import sys
import argparse

import numpy as np

import parser
import milestoning
import sigma_ramd

def analyze_log(ramd_log_file_list, num_milestones=10, num_xi_bins=5,
verbose=True):
if verbose:
print(f"Using {num_milestones} milestones.")
print(f"Using {num_xi_bins} angle bins.")
xi_bins_ranges = np.linspace(-1.0, 1.0, num_xi_bins+1)
trajectory_list = []
forceOutFreq = None
forceRAMD = None
timeStep = None
for ramd_log_filename in ramd_log_file_list:
trajectories, forceOutFreq_, forceRAMD_, timeStep_ \
= parser.parse_ramd_log_file(ramd_log_filename)
if forceOutFreq is None:
forceOutFreq = forceOutFreq_
forceRAMD = forceRAMD_
timeStep = timeStep_
else:
assert forceOutFreq == forceOutFreq_, "RAMD logs contain differing forceOutFreq values."
assert forceRAMD == forceRAMD_, "RAMD logs contain differing forceRAMD values."
assert timeStep == timeStep_, "RAMD logs contain differing timeStep values."

trajectory_list += trajectories

if verbose:
print("Number of trajectories extracted from log file(s):",
len(trajectory_list))
print("forceOutFreq:", forceOutFreq)
print("forceRAMD:", forceRAMD, "kcal/(mole * Angstrom)")
print("timeStep:", timeStep, "ps")

# Each trajectory represents a set of frames between force changes
# Align the trajectories and convert to 1D, and xi values
one_dim_trajs = parser.condense_trajectories(trajectories)

#Construct a milestoning model based on trajectory fragments
milestones, min_location, max_location, starting_cv_val \
= milestoning.uniform_milestone_locations(one_dim_trajs, num_milestones)

xi_bins = []
for j in range(num_xi_bins):
xi_bins.append([])

for i, trajectory in enumerate(one_dim_trajs):
traj_xi = trajectory[0][1]
found_bin = False
for j in range(num_xi_bins):
if (traj_xi >= xi_bins_ranges[j]) \
and (traj_xi <= xi_bins_ranges[j+1]):
xi_bins[j].append(trajectory)
found_bin = True
break

if not found_bin:
raise Exception(f"xi value {traj_xi} not placed in bin.")

frame_time = timeStep * forceOutFreq
xi_bin_time_profiles = np.zeros((num_xi_bins, num_milestones, num_milestones))
for i in range(num_xi_bins):
if verbose:
print(f"num in bin {i}: {len(xi_bins[i])}")
xi_bin_trajs = xi_bins[i]
count_matrix, time_vector, rate_matrix, transition_matrix \
= milestoning.make_milestoning_model(xi_bin_trajs, milestones, frame_time, num_milestones)

# use the milestoning model to construct time profiles

time_profile_matrix = milestoning.make_time_profiles(transition_matrix, time_vector, num_milestones)
xi_bin_time_profiles[i,:,:] = time_profile_matrix[:,:]

# Use sigma-RAMD to analyze
beta=0.0 # TODO: fill out
calc = sigma_ramd.functional_expansion_1d_ramd(
xi_bin_time_profiles, force_constant=forceRAMD, beta=beta,
min_cv_val=min_location, max_cv_val=max_location,
starting_cv_val=starting_cv_val)
time_estimate = calc.make_second_order_time_estimate()
print("time_estimate:", time_estimate)

if __name__ == "__main__":
argparser = argparse.ArgumentParser(description=__doc__)
argparser.add_argument(
"ramdLogFiles", metavar="RAMDLOGFILES", type=str, nargs="+",
help="The RAMD log files to open and parse.")
argparser.add_argument(
"-n", "--numMilestones", dest="numMilestones", default=10,
help="The number of milestones to use in the milestoning "\
"calculation.", type=int)
argparser.add_argument(
"-a", "--numAngleBins", dest="numAngleBins", default=5,
help="The number of bins to divide the force vectors into.", type=int)

args = argparser.parse_args()
args = vars(args)
ramdLogFiles = args["ramdLogFiles"]
numMilestones = args["numMilestones"]
numAngleBins = args["numAngleBins"]

analyze_log(ramdLogFiles, numMilestones, numAngleBins)
109 changes: 109 additions & 0 deletions openmm_ramd/analyze/milestoning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""
openmm_ramd/analyze/milestoning.py
Prepare and utilize a milestoning model for RAMD simulation to use
in a sigma-RAMD analysis.
"""

import numpy as np

def uniform_milestone_locations(one_dim_trajs, num_milestones):
"""
Define a set of milestone locations uniformly spread.
"""
# First, locate the minimum and maximum positions along the CV
min_location = 9e9
max_location = -9e9
starting_cv_val = None
for i, trajectory in enumerate(one_dim_trajs):
for j, frame in enumerate(trajectory):
[cv_val, xi_val] = frame
if i == 0 and j == 0:
starting_cv_val = cv_val
if cv_val < min_location:
min_location = cv_val
if cv_val > max_location:
max_location = cv_val

assert min_location < max_location, \
"The minimum must be less than the maximum."

assert starting_cv_val is not None, "No starting CV value found."

span = max_location - min_location
h = span / (num_milestones + 1)
milestones = []
for i in range(num_milestones):
milestone = h*(i+1)
milestones.append(milestone)

return milestones, min_location, max_location, starting_cv_val

def make_milestoning_model(
xi_bin_trajs, milestones, frame_time, num_milestones):
count_matrix = np.zeros((num_milestones, num_milestones), dtype=np.int32)
time_vector = np.zeros((num_milestones, 1))

for i, traj in enumerate(xi_bin_trajs):
src_milestone = None
src_time = 0.0
new_cell_id = None
src_time = -frame_time
for j, frame in enumerate(traj):
[cv_val, xi_val] = frame
# Find what cell it's in
old_cell_id = new_cell_id
new_cell_id = num_milestones
src_time += frame_time
for k, milestone in enumerate(milestones):
if cv_val < milestone:
new_cell_id = k
break
if (old_cell_id is not None) and (new_cell_id != old_cell_id):
# then a milestone transition has happened
diff = new_cell_id - old_cell_id
dest_milestone = new_cell_id - (diff + 1) // 2
if src_milestone != dest_milestone:
if src_milestone is not None:
assert src_milestone != dest_milestone
count_matrix[dest_milestone, src_milestone] += 1
time_vector[src_milestone, 0] += src_time

src_milestone = dest_milestone
src_time = 0.0

#print("count_matrix:", count_matrix)
#print("time_vector:", time_vector)

transition_matrix = np.zeros((num_milestones, num_milestones))
for i in range(num_milestones): # column index
column_sum = np.sum(count_matrix[:,i])
for j in range(num_milestones): # row index
if column_sum == 0.0:
transition_matrix[j,i] = 0.0
else:
transition_matrix[j,i] = count_matrix[j,i] / column_sum

rate_matrix = np.zeros((num_milestones, num_milestones))
for i in range(num_milestones):
for j in range(num_milestones):
if time_vector[j,0] == 0.0:
rate_matrix[i,j] = 0.0
else:
rate_matrix[i,j] = count_matrix[i,j] / time_vector[j,0]

#print("rate_matrix:", rate_matrix)

return count_matrix, time_vector, transition_matrix, rate_matrix

def make_time_profiles(transition_matrix, time_vector, num_milestones):
transit_time_matrix = np.zeros((num_milestones, num_milestones))
identity = np.identity(num_milestones)
for i in range(num_milestones):
transition_matrix_with_sink = np.copy(transition_matrix[:,:])
transition_matrix_with_sink[:,i] = 0.0
geom_series = np.linalg.inv(identity-transition_matrix_with_sink)
result = time_vector.T @ geom_series
transit_time_matrix[i,:] = result[0,:]

return transit_time_matrix
157 changes: 157 additions & 0 deletions openmm_ramd/analyze/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""
parser.py
Parse RAMD log files, and use the log files to construct trajectories of
CVs.
"""

import re
import os

import numpy as np

def parse_ramd_log_file(ramd_log_filename):
assert os.path.exists(ramd_log_filename), \
f"File not found: {ramd_log_filename}."
trajectories = []
trajectory = []
n_frames = 0
relative_COM_x = None
relative_COM_y = None
relative_COM_z = None
ligand_step = None
ligand_COM_x = None
ligand_COM_y = None
ligand_COM_z = None
with open(ramd_log_filename, "r") as f:
for i, line in enumerate(f.readlines()):
new_trajectory_frame = False
new_trajectory = False
# Determine the log output frequency
forceOutFreq_search = re.match("RAMD: forceOutFreq * (\d+)", line)
if forceOutFreq_search:
forceOutFreq = int(forceOutFreq_search.group(1))

forceRAMD_search = re.match("RAMD: forceRAMD * (\d+\.\d*)", line)
if forceRAMD_search:
forceRAMD = float(forceRAMD_search.group(1))

timeStep_search = re.match("RAMD: timeStep * (\d+\.\d*)", line)
if timeStep_search:
timeStep = float(timeStep_search.group(1))

# Find the lines that indicate the ligand COM
lig_com_search = re.match("RAMD FORCE: (\d+) > LIGAND COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
if lig_com_search:
old_ligand_step = ligand_step
ligand_step = int(lig_com_search.group(1))
old_ligand_COM_x = ligand_COM_x
old_ligand_COM_y = ligand_COM_y
old_ligand_COM_z = ligand_COM_z
ligand_COM_x = float(lig_com_search.group(2))
ligand_COM_y = float(lig_com_search.group(3))
ligand_COM_z = float(lig_com_search.group(4))
new_trajectory_frame = True

rec_com_search = re.match("RAMD FORCE: (\d+) > PROTEIN COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
if rec_com_search:
receptor_step = int(rec_com_search.group(1))
receptor_COM_x = float(rec_com_search.group(2))
receptor_COM_y = float(rec_com_search.group(3))
receptor_COM_z = float(rec_com_search.group(4))
relative_COM_x = ligand_COM_x - receptor_COM_x
relative_COM_y = ligand_COM_y - receptor_COM_y
relative_COM_z = ligand_COM_z - receptor_COM_z
continue

accel_search = re.match("RAMD: (\d+) >>> KEEP PREVIOUS ACCELERATION DIRECTION: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
if accel_search:
accel_step = int(accel_search.group(1))
accel_COM_x = float(accel_search.group(2))
accel_COM_y = float(accel_search.group(3))
accel_COM_z = float(accel_search.group(4))
continue

change_accel_search = re.match("RAMD: (\d+) >>> CHANGE ACCELERATION DIRECTION TO: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
if change_accel_search:
accel_step = int(change_accel_search.group(1))
#accel_COM_x = float(change_accel_search.group(2))
#accel_COM_y = float(change_accel_search.group(3))
#accel_COM_z = float(change_accel_search.group(4))
old_ligand_step = ligand_step
new_trajectory_frame = True
new_trajectory = True

if new_trajectory_frame:
if n_frames > 0:
if relative_COM_x is not None:
assert old_ligand_step == receptor_step, \
"The lines defining receptor and ligand COM are not the same RAMD step."
frame = [relative_COM_x, relative_COM_y, relative_COM_z,
accel_COM_x, accel_COM_y, accel_COM_z]
else:
frame = [old_ligand_COM_x, old_ligand_COM_y, old_ligand_COM_z,
accel_COM_x, accel_COM_y, accel_COM_z]

assert old_ligand_step == accel_step, \
"The lines defining ligand COM and accel vector are not the same RAMD step."

trajectory.append(frame)

n_frames += 1

if new_trajectory:

trajectories.append(trajectory)
trajectory = []
n_frames = 0

return trajectories, forceOutFreq, forceRAMD, timeStep

def condense_trajectories(trajectories):
"""
Reduce trajectories to 1D, and compute xi values
"""
# First, find the average position of all ligand positions, this will
# define our 1D CV
sum_ligx = 0.0
sum_ligy = 0.0
sum_ligz = 0.0
total_points = 0

for i, trajectory in enumerate(trajectories):
start_accx = None
for j, frame in enumerate(trajectory):
[ligx, ligy, ligz, accx, accy, accz] = frame
sum_ligx += ligx
sum_ligy += ligy
sum_ligz += ligz
total_points += 1
if start_accx is None:
start_accx = accx
else:
assert start_accx == start_accx, \
"Every frame must have the same acceleration vector"

avg_ligx = sum_ligx / total_points
avg_ligy = sum_ligy / total_points
avg_ligz = sum_ligz / total_points
avg_lig_length = np.sqrt(avg_ligx**2 + avg_ligy**2 + avg_ligz**2)
norm_avgx = avg_ligx / avg_lig_length
norm_avgy = avg_ligy / avg_lig_length
norm_avgz = avg_ligz / avg_lig_length

one_dim_trajs = []
for i, trajectory in enumerate(trajectories):
one_dim_traj = []
for j, frame in enumerate(trajectory):
[ligx, ligy, ligz, accx, accy, accz] = frame
# dot product between frame pos and avg_lig
cv_val = (ligx*norm_avgx + ligy*norm_avgy + ligz*norm_avgz)
# dot product between frame acc and avg_lig
xi_val = (accx*norm_avgx + accy*norm_avgy + accz*norm_avgz)
one_dim_traj.append([cv_val, xi_val])

one_dim_trajs.append(one_dim_traj)

return one_dim_trajs
Loading

0 comments on commit eff682f

Please sign in to comment.