Added analyze scripts, also temperature to log file

seekrcentral · Jun 16, 2023 · eff682f · eff682f
1 parent c460df6
commit eff682f
Show file tree

Hide file tree

Showing 6 changed files with 569 additions and 5 deletions.
diff --git a/openmm_ramd/analyze/analyze_log.py b/openmm_ramd/analyze/analyze_log.py
@@ -0,0 +1,118 @@
+"""
+analyze_log.py
+
+Read and parse a RAMD log file. Construct a series of milestoning models from 
+the trajectory fragments for a set of bins of RAMD force vectors. The 
+information from the milestoning models will be used in a sigma-RAMD model
+that will allow one to expand the residence time in a Taylor series expansion
+according to sigma-RAMD theory.
+"""
+
+import sys
+import argparse
+
+import numpy as np
+
+import parser
+import milestoning
+import sigma_ramd
+
+def analyze_log(ramd_log_file_list, num_milestones=10, num_xi_bins=5, 
+                verbose=True):
+    if verbose:
+        print(f"Using {num_milestones} milestones.")
+        print(f"Using {num_xi_bins} angle bins.")
+    xi_bins_ranges = np.linspace(-1.0, 1.0, num_xi_bins+1)
+    trajectory_list = []
+    forceOutFreq = None
+    forceRAMD = None
+    timeStep = None
+    for ramd_log_filename in ramd_log_file_list:
+        trajectories, forceOutFreq_, forceRAMD_, timeStep_ \
+            = parser.parse_ramd_log_file(ramd_log_filename)
+        if forceOutFreq is None:
+            forceOutFreq = forceOutFreq_
+            forceRAMD = forceRAMD_
+            timeStep = timeStep_
+        else:
+            assert forceOutFreq == forceOutFreq_, "RAMD logs contain differing forceOutFreq values."
+            assert forceRAMD == forceRAMD_, "RAMD logs contain differing forceRAMD values."
+            assert timeStep == timeStep_, "RAMD logs contain differing timeStep values."
+
+        trajectory_list += trajectories
+
+    if verbose:
+        print("Number of trajectories extracted from log file(s):", 
+              len(trajectory_list))
+        print("forceOutFreq:", forceOutFreq)
+        print("forceRAMD:", forceRAMD, "kcal/(mole * Angstrom)")
+        print("timeStep:", timeStep, "ps")
+
+    # Each trajectory represents a set of frames between force changes
+    # Align the trajectories and convert to 1D, and xi values
+    one_dim_trajs = parser.condense_trajectories(trajectories)
+
+    #Construct a milestoning model based on trajectory fragments
+    milestones, min_location, max_location, starting_cv_val \
+        = milestoning.uniform_milestone_locations(one_dim_trajs, num_milestones)
+
+    xi_bins = []
+    for j in range(num_xi_bins):
+        xi_bins.append([])
+
+    for i, trajectory in enumerate(one_dim_trajs):
+        traj_xi = trajectory[0][1]
+        found_bin = False
+        for j in range(num_xi_bins):
+            if (traj_xi >= xi_bins_ranges[j]) \
+                    and (traj_xi <= xi_bins_ranges[j+1]):
+                xi_bins[j].append(trajectory)
+                found_bin = True
+                break
+
+        if not found_bin:
+            raise Exception(f"xi value {traj_xi} not placed in bin.")
+
+    frame_time = timeStep * forceOutFreq
+    xi_bin_time_profiles = np.zeros((num_xi_bins, num_milestones, num_milestones))
+    for i in range(num_xi_bins):
+        if verbose:
+            print(f"num in bin {i}: {len(xi_bins[i])}")
+        xi_bin_trajs = xi_bins[i]
+        count_matrix, time_vector, rate_matrix, transition_matrix \
+            = milestoning.make_milestoning_model(xi_bin_trajs, milestones, frame_time, num_milestones)
+
+        # use the milestoning model to construct time profiles
+
+        time_profile_matrix = milestoning.make_time_profiles(transition_matrix, time_vector, num_milestones)
+        xi_bin_time_profiles[i,:,:] = time_profile_matrix[:,:]
+
+    # Use sigma-RAMD to analyze
+    beta=0.0 # TODO: fill out
+    calc = sigma_ramd.functional_expansion_1d_ramd(
+        xi_bin_time_profiles, force_constant=forceRAMD, beta=beta,
+        min_cv_val=min_location, max_cv_val=max_location, 
+        starting_cv_val=starting_cv_val)
+    time_estimate = calc.make_second_order_time_estimate()
+    print("time_estimate:", time_estimate)
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser(description=__doc__)
+    argparser.add_argument(
+        "ramdLogFiles", metavar="RAMDLOGFILES", type=str, nargs="+",
+        help="The RAMD log files to open and parse.")
+    argparser.add_argument(
+        "-n", "--numMilestones", dest="numMilestones", default=10,
+        help="The number of milestones to use in the milestoning "\
+        "calculation.", type=int)
+    argparser.add_argument(
+        "-a", "--numAngleBins", dest="numAngleBins", default=5,
+        help="The number of bins to divide the force vectors into.", type=int)
+
+    args = argparser.parse_args()
+    args = vars(args)
+    ramdLogFiles = args["ramdLogFiles"]
+    numMilestones = args["numMilestones"]
+    numAngleBins = args["numAngleBins"]
+
+    analyze_log(ramdLogFiles, numMilestones, numAngleBins)
diff --git a/openmm_ramd/analyze/milestoning.py b/openmm_ramd/analyze/milestoning.py
@@ -0,0 +1,109 @@
+"""
+openmm_ramd/analyze/milestoning.py
+
+Prepare and utilize a milestoning model for RAMD simulation to use
+in a sigma-RAMD analysis.
+"""
+
+import numpy as np
+
+def uniform_milestone_locations(one_dim_trajs, num_milestones):
+    """
+    Define a set of milestone locations uniformly spread.
+    """
+    # First, locate the minimum and maximum positions along the CV
+    min_location = 9e9
+    max_location = -9e9
+    starting_cv_val = None
+    for i, trajectory in enumerate(one_dim_trajs):
+        for j, frame in enumerate(trajectory):
+            [cv_val, xi_val] = frame
+            if i == 0 and j == 0:
+                starting_cv_val = cv_val
+            if cv_val < min_location:
+                min_location = cv_val
+            if cv_val > max_location:
+                max_location = cv_val
+
+    assert min_location < max_location, \
+        "The minimum must be less than the maximum."
+
+    assert starting_cv_val is not None, "No starting CV value found."
+
+    span = max_location - min_location
+    h = span / (num_milestones + 1)
+    milestones = []
+    for i in range(num_milestones):
+        milestone = h*(i+1)
+        milestones.append(milestone)
+
+    return milestones, min_location, max_location, starting_cv_val
+
+def make_milestoning_model(
+        xi_bin_trajs, milestones, frame_time, num_milestones):
+    count_matrix = np.zeros((num_milestones, num_milestones), dtype=np.int32)
+    time_vector = np.zeros((num_milestones, 1))
+
+    for i, traj in enumerate(xi_bin_trajs):
+        src_milestone = None
+        src_time = 0.0
+        new_cell_id = None
+        src_time = -frame_time
+        for j, frame in enumerate(traj):
+            [cv_val, xi_val] = frame
+            # Find what cell it's in
+            old_cell_id = new_cell_id
+            new_cell_id = num_milestones
+            src_time += frame_time
+            for k, milestone in enumerate(milestones):
+                if cv_val < milestone:
+                    new_cell_id = k
+                    break
+            if (old_cell_id is not None) and (new_cell_id != old_cell_id):
+                # then a milestone transition has happened
+                diff = new_cell_id - old_cell_id
+                dest_milestone = new_cell_id - (diff + 1) // 2
+                if src_milestone != dest_milestone:
+                    if src_milestone is not None:
+                        assert src_milestone != dest_milestone
+                        count_matrix[dest_milestone, src_milestone] += 1
+                        time_vector[src_milestone, 0] += src_time
+
+                    src_milestone = dest_milestone
+                    src_time = 0.0
+
+    #print("count_matrix:", count_matrix)
+    #print("time_vector:", time_vector)
+
+    transition_matrix = np.zeros((num_milestones, num_milestones))
+    for i in range(num_milestones): # column index
+        column_sum = np.sum(count_matrix[:,i])
+        for j in range(num_milestones): # row index
+            if column_sum == 0.0:
+                transition_matrix[j,i] = 0.0
+            else:
+                transition_matrix[j,i] = count_matrix[j,i] / column_sum
+
+    rate_matrix = np.zeros((num_milestones, num_milestones))
+    for i in range(num_milestones):
+        for j in range(num_milestones):
+            if time_vector[j,0] == 0.0:
+                rate_matrix[i,j] = 0.0
+            else:
+                rate_matrix[i,j] = count_matrix[i,j] / time_vector[j,0]
+
+    #print("rate_matrix:", rate_matrix)
+
+    return count_matrix, time_vector, transition_matrix, rate_matrix
+
+def make_time_profiles(transition_matrix, time_vector, num_milestones):
+    transit_time_matrix = np.zeros((num_milestones, num_milestones))
+    identity = np.identity(num_milestones)
+    for i in range(num_milestones):
+        transition_matrix_with_sink = np.copy(transition_matrix[:,:])
+        transition_matrix_with_sink[:,i] = 0.0
+        geom_series = np.linalg.inv(identity-transition_matrix_with_sink)
+        result = time_vector.T @ geom_series
+        transit_time_matrix[i,:] = result[0,:]
+
+    return transit_time_matrix
diff --git a/openmm_ramd/analyze/parser.py b/openmm_ramd/analyze/parser.py
@@ -0,0 +1,157 @@
+"""
+parser.py
+
+Parse RAMD log files, and use the log files to construct trajectories of
+CVs.
+"""
+
+import re
+import os
+
+import numpy as np
+
+def parse_ramd_log_file(ramd_log_filename):
+    assert os.path.exists(ramd_log_filename), \
+        f"File not found: {ramd_log_filename}."
+    trajectories = []
+    trajectory = []
+    n_frames = 0
+    relative_COM_x = None
+    relative_COM_y = None
+    relative_COM_z = None
+    ligand_step = None
+    ligand_COM_x = None
+    ligand_COM_y = None
+    ligand_COM_z = None
+    with open(ramd_log_filename, "r") as f:
+        for i, line in enumerate(f.readlines()):
+            new_trajectory_frame = False
+            new_trajectory = False
+            # Determine the log output frequency
+            forceOutFreq_search = re.match("RAMD: forceOutFreq * (\d+)", line)
+            if forceOutFreq_search:
+                forceOutFreq = int(forceOutFreq_search.group(1))
+
+            forceRAMD_search = re.match("RAMD: forceRAMD * (\d+\.\d*)", line)
+            if forceRAMD_search:
+                forceRAMD = float(forceRAMD_search.group(1))
+
+            timeStep_search = re.match("RAMD: timeStep * (\d+\.\d*)", line)
+            if timeStep_search:
+                timeStep = float(timeStep_search.group(1))
+
+            # Find the lines that indicate the ligand COM
+            lig_com_search = re.match("RAMD FORCE: (\d+) > LIGAND COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
+            if lig_com_search:
+                old_ligand_step = ligand_step
+                ligand_step = int(lig_com_search.group(1))
+                old_ligand_COM_x = ligand_COM_x
+                old_ligand_COM_y = ligand_COM_y
+                old_ligand_COM_z = ligand_COM_z
+                ligand_COM_x = float(lig_com_search.group(2))
+                ligand_COM_y = float(lig_com_search.group(3))
+                ligand_COM_z = float(lig_com_search.group(4))
+                new_trajectory_frame = True
+
+            rec_com_search = re.match("RAMD FORCE: (\d+) > PROTEIN COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
+            if rec_com_search:
+                receptor_step = int(rec_com_search.group(1))
+                receptor_COM_x = float(rec_com_search.group(2))
+                receptor_COM_y = float(rec_com_search.group(3))
+                receptor_COM_z = float(rec_com_search.group(4))
+                relative_COM_x = ligand_COM_x - receptor_COM_x
+                relative_COM_y = ligand_COM_y - receptor_COM_y
+                relative_COM_z = ligand_COM_z - receptor_COM_z
+                continue
+
+            accel_search = re.match("RAMD: (\d+)    >>> KEEP PREVIOUS ACCELERATION DIRECTION: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
+            if accel_search:
+                accel_step = int(accel_search.group(1))
+                accel_COM_x = float(accel_search.group(2))
+                accel_COM_y = float(accel_search.group(3))
+                accel_COM_z = float(accel_search.group(4))
+                continue
+
+            change_accel_search = re.match("RAMD: (\d+)    >>> CHANGE ACCELERATION DIRECTION TO: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line)
+            if change_accel_search:
+                accel_step = int(change_accel_search.group(1))
+                #accel_COM_x = float(change_accel_search.group(2))
+                #accel_COM_y = float(change_accel_search.group(3))
+                #accel_COM_z = float(change_accel_search.group(4))
+                old_ligand_step = ligand_step
+                new_trajectory_frame = True
+                new_trajectory = True
+
+            if new_trajectory_frame:
+                if n_frames > 0:
+                    if relative_COM_x is not None:
+                        assert old_ligand_step == receptor_step, \
+                            "The lines defining receptor and ligand COM are not the same RAMD step."
+                        frame = [relative_COM_x, relative_COM_y, relative_COM_z,
+                                 accel_COM_x, accel_COM_y, accel_COM_z]
+                    else:
+                        frame = [old_ligand_COM_x, old_ligand_COM_y, old_ligand_COM_z,
+                                 accel_COM_x, accel_COM_y, accel_COM_z]
+
+                    assert old_ligand_step == accel_step, \
+                        "The lines defining ligand COM and accel vector are not the same RAMD step."
+
+                    trajectory.append(frame)
+
+                n_frames += 1
+
+            if new_trajectory:
+
+                trajectories.append(trajectory)
+                trajectory = []
+                n_frames = 0
+
+    return trajectories, forceOutFreq, forceRAMD, timeStep
+
+def condense_trajectories(trajectories):
+    """
+    Reduce trajectories to 1D, and compute xi values
+    """
+    # First, find the average position of all ligand positions, this will
+    # define our 1D CV
+    sum_ligx = 0.0
+    sum_ligy = 0.0
+    sum_ligz = 0.0
+    total_points = 0
+
+    for i, trajectory in enumerate(trajectories):
+        start_accx = None
+        for j, frame in enumerate(trajectory):
+            [ligx, ligy, ligz, accx, accy, accz] = frame
+            sum_ligx += ligx
+            sum_ligy += ligy
+            sum_ligz += ligz
+            total_points += 1
+            if start_accx is None:
+                start_accx = accx
+            else:
+                assert start_accx == start_accx, \
+                    "Every frame must have the same acceleration vector"
+
+    avg_ligx = sum_ligx / total_points
+    avg_ligy = sum_ligy / total_points
+    avg_ligz = sum_ligz / total_points
+    avg_lig_length = np.sqrt(avg_ligx**2 + avg_ligy**2 + avg_ligz**2)
+    norm_avgx = avg_ligx / avg_lig_length
+    norm_avgy = avg_ligy / avg_lig_length
+    norm_avgz = avg_ligz / avg_lig_length
+
+    one_dim_trajs = []
+    for i, trajectory in enumerate(trajectories):
+        one_dim_traj = []
+        for j, frame in enumerate(trajectory):
+            [ligx, ligy, ligz, accx, accy, accz] = frame
+            # dot product between frame pos and avg_lig
+            cv_val = (ligx*norm_avgx + ligy*norm_avgy + ligz*norm_avgz)
+            # dot product between frame acc and avg_lig
+            xi_val = (accx*norm_avgx + accy*norm_avgy + accz*norm_avgz)
+            one_dim_traj.append([cv_val, xi_val])
+
+        one_dim_trajs.append(one_dim_traj)
+
+    return one_dim_trajs