Fixes to convergence plot rendering. Also, updates and improvements to documentation.

lvotapka · lvotapka · commit 63591b0a8f92 · 2024-06-10T14:30:55.000-06:00
diff --git a/README.md b/README.md
@@ -52,15 +52,14 @@ OpenMM is recommended for the molecular dynamics (MD) stage of SEEKR2. SEEKR2
 also needs the SEEKR2 OpenMM Plugin in order to use OpenMM for MD simulations.
 
 The easiest, quickest way to install the SEEKR2 OpenMM Plugin is to use
-Conda. If you don't already have Conda installed, Download Conda with 
-Python version 3.8 from 
-https://conda.io/en/latest/miniconda.html and run the downloaded script and 
-fill out the prompts. 
+Mamba. If you don't already have Mamba installed, Download Mamba from 
+https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh 
+and run the downloaded script and fill out the prompts. 
 
-With Conda working, install the SEEKR2 OpenMM Plugin:
+With Mamba working, install the SEEKR2 OpenMM Plugin:
 
 ```
-conda install -c conda-forge seekr2_openmm_plugin
+mamba install seekr2_openmm_plugin
 ```
 One can test the installation by opening a Python terminal and typing:
 
@@ -72,14 +71,21 @@ If there is a problem related to not being able to find libOpenMM8.1, one
 can try specifying the OpenMM version:
 
 ```
-conda install -c conda-forge seekr2_openmm_plugin openmm=8.1
+mamba install seekr2_openmm_plugin openmm=8.1
+```
+
+If there is an error such as "CUDA_ERROR_UNSUPPORTED_PTX_VERSION", one can 
+see if a different version of CudaToolKit will work:
+
+```
+mamba install seekr2_openmm_plugin cudatoolkit=11.7
 ```
 
 If there is an error such as "No module named seekr2plugin", one can always 
 try installing an older version of OpenMM and CUDA:
 
 ```
-conda install -c conda-forge seekr2_openmm_plugin cudatoolkit=10.2 openmm=7.7
+mamba install seekr2_openmm_plugin cudatoolkit=10.2 openmm=7.7
 ```
 
 Alternatively, NAMD2 may be used for MD if desired. See the NAMD2 section
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -74,7 +74,7 @@ can try specifying the OpenMM version:
 If you get an error such as "CUDA_ERROR_UNSUPPORTED_PTX_VERSION", you might
 need to install with a different CUDA Toolkit version:
 
-``conda install seekr2_openmm_plugin cudatoolkit=11.7``
+``mamba install seekr2_openmm_plugin cudatoolkit=11.7``
 
 
 Installation of SEEKR2 itself begins with cloning and installing the SEEKR2 
@@ -401,4 +401,4 @@ Additional continuous integration tests may be run from the Python scripts in
 the seekr2/seekr2/continuous_integration/ directory if extra testing is
 desired.
 
-You should now be able to use SEEKR2.
+You should now be able to use SEEKR2.
diff --git a/seekr2/analyze.py b/seekr2/analyze.py
@@ -697,27 +697,32 @@ def save_plots(self, image_directory):
             plt.xticks(anchor_values, anchor_values, rotation=90)
             plt.ylabel("\u03C0_{\u03B1}")
             plt.xlabel("anchor value")
+            plt.yscale("log", nonpositive="mask")
             plt.tight_layout()
             pi_fig.savefig(os.path.join(image_directory, "pi_alpha.png"))
             
         # save p_i
         pi_fig, ax = plt.subplots()
-        plt.errorbar(np.round(milestone_values, 3), self.p_i, yerr=self.p_i_error, 
-                     ecolor="k", capsize=2)
-        plt.xticks(np.round(milestone_values, 3), np.round(milestone_values, 3), rotation=90)
+        plt.errorbar(np.round(milestone_values, 3), self.p_i, 
+                     yerr=self.p_i_error, ecolor="k", capsize=2)
+        plt.xticks(np.round(milestone_values, 3), np.round(milestone_values, 3),
+                   rotation=90)
         plt.ylabel("p_i")
         plt.xlabel("milestones")
+        plt.yscale("log", nonpositive="mask")
         plt.tight_layout()
         pi_fig.savefig(os.path.join(image_directory, "p_i.png"))
         # save free energy milestone profile
         pi_fig, ax = plt.subplots()
         plt.errorbar(np.round(milestone_values, 3), self.free_energy_profile, 
                  yerr=self.free_energy_profile_err, ecolor="k", capsize=2)
-        plt.xticks(np.round(milestone_values, 3), np.round(milestone_values, 3), rotation=90)
+        plt.xticks(np.round(milestone_values, 3), np.round(milestone_values, 3),
+                   rotation=90)
         plt.ylabel("\u0394G(milestone) (kcal/mol)")
         plt.xlabel("milestones")
         plt.tight_layout()
-        pi_fig.savefig(os.path.join(image_directory, "free_energy_profile_milestones.png"))
+        pi_fig.savefig(os.path.join(
+            image_directory, "free_energy_profile_milestones.png"))
         
         if self.free_energy_anchors is not None:
             # save free energy anchor profile
@@ -728,7 +733,8 @@ def save_plots(self, image_directory):
             plt.ylabel("\u0394G(anchor) (kcal/mol)")
             plt.xlabel("anchor")
             plt.tight_layout()
-            pi_fig.savefig(os.path.join(image_directory, "free_energy_profile_anchor.png"))
+            pi_fig.savefig(os.path.join(
+                image_directory, "free_energy_profile_anchor.png"))
         return
         
 def analyze(model, force_warning=False, num_error_samples=1000, 
diff --git a/seekr2/converge.py b/seekr2/converge.py
@@ -29,7 +29,7 @@ def converge(model, k_on_state=None, image_directory=None, verbose=False,
     
     if image_directory is None or image_directory == "" or not long_converge:
         return data_sample_list, times_dict
-        
+    
     k_off_fig, ax = common_converge.plot_scalar_conv(
         k_off_conv, max_step_list, title="$k_{off}$ Convergence", 
         label="k_{off} (s^{-1})", timestep_in_ns=timestep_in_ns)
diff --git a/seekr2/modules/check.py b/seekr2/modules/check.py
@@ -606,6 +606,37 @@ def recurse_atoms(atom, _visited_indices=set()):
             _visited_indices.update(branch_indices)
     return _visited_indices
 
+def check_atom_selections_dont_contain_hydrogen(model):
+    """
+    It has been observed that atom selections (for use as CVs) that
+    include hydrogen often cause numerical instabilities. This 
+    check will raise an error if any CV atom selections include
+    hydrogen atoms.
+    """
+    warnstr = """CHECK FAILURE: the atom selection for collective variable
+    (CV) number {} contains hydrogens, including atom index {}. It has
+    been observed that CV atom selections including hydrogen are often
+    unintentional, and tend to produce numerical instabilities, causing 
+    the simulations to blow up. Please ensure that this CV was defined 
+    by the intended atom selection and consider using selections that do 
+    not include any hydrogens.
+    """
+    for anchor in model.anchors:
+        structure = load_structure_with_parmed(model, anchor)
+        if structure is None:
+            continue
+        for cv in model.collective_variables:
+            atom_groups = cv.get_atom_groups()
+            for atom_group in atom_groups:
+                for atom_index in atom_group:
+                    atom = structure.atoms[atom_index]
+                    if atom.element == 1:
+                        # Then it's a hydrogen
+                        print(warnstr.format(cv.index, atom_index))
+                        return False
+                    
+    return True
+
 def check_atom_selections_on_same_molecule(model):
     """
     The user might accidentally define atom selections that span
@@ -621,7 +652,8 @@ def check_atom_selections_on_same_molecule(model):
     check the structure for anchor {} to ensure that atom indexing 
     is correct. Keep in mind: SEEKR2 atom indexing starts at 0 (but 
     PDB files often start with a different atom serial index, such
-    as 1 or another number)."""
+    as 1 or another number). This might also be caused by multimeric 
+    sites or unclosed loops."""
     warnstr2 = """CHECK FAILURE: the atom index {} for collective variable
     (CV) number {} does not exist in the structure for anchor {}."""
     for anchor in model.anchors:
@@ -910,6 +942,7 @@ def check_pre_simulation_all(model):
         # Skipping MD/BD salt conc. check because the best results seem to 
         # come from using no salt in BD.
         #check_passed_list.append(check_pre_sim_MD_and_BD_salt_concentration(model))
+        check_passed_list.append(check_atom_selections_dont_contain_hydrogen(model))
         check_passed_list.append(check_atom_selections_on_same_molecule(model))
         check_passed_list.append(check_atom_selections_MD_BD(model))
         check_passed_list.append(check_pqr_residues(model))
diff --git a/seekr2/modules/common_analyze.py b/seekr2/modules/common_analyze.py
@@ -582,7 +582,10 @@ def calculate_kinetics(self):
                             continue
                     bulk_milestones.append(milestone_id)
                     #bulk_milestone = milestone_id
-
+        
+        assert len(end_milestones) > 0, "No end (bound or otherwise) states "\
+            "defined in this model. Kinetics calculations will not work."
+        
         if np.any(self.Q.sum(axis=1) > 1.E-10):
             problem_milestone = np.argmin(self.Q.T.sum(axis=1))
             error_msg = """The rate matrix Q has a numerically overflowed row 
diff --git a/seekr2/modules/common_base.py b/seekr2/modules/common_base.py
@@ -33,7 +33,7 @@ def strBool(bool_str):
         raise Exception(
             "argument for strBool must be string either 'True' or 'False'.")
 
-def order_files_numerically(file_list, func=int, use_basename=False):
+def order_files_numerically(file_list, func=float, use_basename=False):
     """
     If there is a list of files, order them numerically, not
     alphabetically and return the sorted list of files. Note that
diff --git a/seekr2/modules/common_converge.py b/seekr2/modules/common_converge.py
@@ -28,7 +28,7 @@
 DEFAULT_SKIP = 0
 
 # The threshold beneath which to skip plotting the convergence
-MIN_PLOT_NORM = 1e-8
+MIN_PLOT_NORM = 1e-18
 
 # The interval between which to update the user on convergence script progress
 PROGRESS_UPDATE_INTERVAL = DEFAULT_NUM_POINTS // 10
@@ -394,8 +394,12 @@ def plot_scalar_conv(conv_values, conv_intervals, label, title, timestep_in_ns,
     ax : object
         matplotlib Axes object
     """
-    if not np.any(np.isfinite(conv_values)) or np.all(conv_values == 0):
-        return None, None
+    #if not np.any(np.isfinite(conv_values)) or np.all(conv_values == 0):
+    #    return None, None
+    for i, conv_value in enumerate(conv_values):
+        if not np.isfinite(conv_value) or conv_value == 0:
+            conv_values[i] = np.NAN
+    
     fig, ax = plt.subplots()
     ax.plot(np.multiply(conv_intervals, timestep_in_ns), conv_values, 
             linestyle="-", marker="o", markersize=1)
@@ -460,9 +464,11 @@ def plot_dict_conv(conv_dict, conv_intervals, label_base, unit, timestep_in_ns,
     for key in conv_dict:
         conv_values = conv_dict[key]
         if not np.all(np.isfinite(conv_values)):
+            print("skipping key:", key, "because values aren't finite")
             continue
         if skip_null:
             if np.linalg.norm(conv_values) < MIN_PLOT_NORM:
+                print("Skipping key:", key, "because values are too low")
                 continue
         if isinstance(key, tuple):
             label = "$" + label_base + "_{" + "\\rightarrow".join(
@@ -473,7 +479,7 @@ def plot_dict_conv(conv_dict, conv_intervals, label_base, unit, timestep_in_ns,
         elif isinstance(key, int):
             label = "$" + label_base + "_{" + str(key) + "(" + unit + ")}$"
             title = "$" + label_base + "_{" + str(key) + "}$"
-            name = label_base + "_{" + str(key) + "}"
+            name = label_base + "_" + str(key) + ""
         else:
             raise Exception("key type not implemented: {}".format(type(key)))
         
@@ -490,6 +496,7 @@ def plot_dict_conv(conv_dict, conv_intervals, label_base, unit, timestep_in_ns,
         ax_list.append(ax)
         title_list.append(title)
         name_list.append(name)
+        
     return fig_list, ax_list, title_list, name_list
 
 def calc_transition_steps(model, data_sample):
diff --git a/seekr2/modules/common_prepare.py b/seekr2/modules/common_prepare.py
@@ -1055,6 +1055,7 @@ def modify_model(old_model, new_model, root_directory, force_overwrite=False):
                 break
         if not alpha_paired:
             new_anchors_to_create.append(alpha)
+        
     for beta, anchor2 in enumerate(old_model.anchors):
         if anchor2.bulkstate:
             continue
@@ -1065,7 +1066,7 @@ def modify_model(old_model, new_model, root_directory, force_overwrite=False):
                 break
         if not beta_paired:
             old_anchors_to_delete.append(beta)
-    
+        
     # Now check all the paired anchors to see if anyone's milestones
     # have changed
     old_anchors_with_changed_milestones = []
diff --git a/seekr2/modules/runner_openmm.py b/seekr2/modules/runner_openmm.py
@@ -522,7 +522,9 @@ def run_mmvt(self, traj_filename):
         if trajectory_reporter_interval is not None:
             simulation.reporters.append(traj_reporter(
                 traj_filename, trajectory_reporter_interval, 
-                enforcePeriodicBox=False))
+                #enforcePeriodicBox=False))
+                # Turning this on
+                enforcePeriodicBox=None))
             if self.restart_checkpoint_interval is not None:
                 assert trajectory_reporter_interval >= \
                     self.restart_checkpoint_interval
@@ -609,7 +611,8 @@ def run_elber(self, traj_filename):
                 and not self.umbrellas_already_exist_mode:
             umbrella_simulation.reporters.append(umbrella_traj_reporter(
                 umbrella_traj_filename, umbrella_trajectory_reporter_interval, 
-                enforcePeriodicBox=False))
+                #enforcePeriodicBox=False))
+                enforcePeriodicBox=None))
             if calc_settings.num_umbrella_stage_steps \
                     < umbrella_trajectory_reporter_interval:
                 umbrella_trajectory_reporter_interval \
@@ -748,7 +751,8 @@ def run_elber(self, traj_filename):
                     print("rev_traj_filename", rev_traj_filename)
                     rev_simulation.reporters = [rev_traj_reporter(
                         rev_traj_filename, rev_trajectory_reporter_interval, 
-                        enforcePeriodicBox=False)]
+                        #enforcePeriodicBox=False)]
+                        enforcePeriodicBox=None)]
                 if rev_energy_reporter_interval is not None:
                     rev_simulation.reporters.append(
                         self.sim_openmm.rev_energy_reporter(
@@ -806,7 +810,8 @@ def run_elber(self, traj_filename):
                             "forward_%d.dcd" % crossing_counter)
                         fwd_simulation.reporters = [fwd_traj_reporter(
                             fwd_traj_filename, fwd_trajectory_reporter_interval, 
-                            enforcePeriodicBox=False)]
+                            #enforcePeriodicBox=False)]
+                            enforcePeriodicBox=None)]
                     if fwd_energy_reporter_interval is not None:
                         fwd_simulation.reporters.append(
                             self.sim_openmm.fwd_energy_reporter(
diff --git a/seekr2/tests/test_common_base.py b/seekr2/tests/test_common_base.py
@@ -50,6 +50,13 @@ def test_order_files_numerically():
     for item1, item2 in zip(ordered_list, desired_list):
         assert item1==item2
     
+    string_list = ["/path/to/anchor_0.1/output0_0", 
+                   "/path/to/anchor_0.2/output0_1"]
+    
+    desired_list = string_list[:]
+    random.shuffle(string_list)
+    ordered_list = base.order_files_numerically(string_list)
+    
     return
 
 def test_box_vectors():