diff --git a/docs/create_docs.sh b/docs/create_docs.sh old mode 100644 new mode 100755 diff --git a/docs/images/Density.png b/docs/images/Density.png new file mode 100644 index 00000000..8784663e Binary files /dev/null and b/docs/images/Density.png differ diff --git a/docs/images/JSD_pdb.png b/docs/images/JSD_pdb.png new file mode 100644 index 00000000..3cc97ce7 Binary files /dev/null and b/docs/images/JSD_pdb.png differ diff --git a/docs/images/SSI_pdb.png b/docs/images/SSI_pdb.png new file mode 100644 index 00000000..c763834d Binary files /dev/null and b/docs/images/SSI_pdb.png differ diff --git a/docs/images/Torsions.jpg b/docs/images/Torsions.jpg new file mode 100644 index 00000000..91aabeed Binary files /dev/null and b/docs/images/Torsions.jpg differ diff --git a/docs/images/WaterFeatures.jpg b/docs/images/WaterFeatures.jpg new file mode 100644 index 00000000..1282f577 Binary files /dev/null and b/docs/images/WaterFeatures.jpg differ diff --git a/docs/images/Waters_and_Density.png b/docs/images/Waters_and_Density.png new file mode 100644 index 00000000..136e717e Binary files /dev/null and b/docs/images/Waters_and_Density.png differ diff --git a/docs/images/bb-clusts.png b/docs/images/bb-clusts.png new file mode 100644 index 00000000..adfc13fd Binary files /dev/null and b/docs/images/bb-clusts.png differ diff --git a/docs/images/bb-dists.png b/docs/images/bb-dists.png new file mode 100644 index 00000000..f24ad122 Binary files /dev/null and b/docs/images/bb-dists.png differ diff --git a/docs/images/receptor_bb-distances-distributions_jsd.pdf b/docs/images/receptor_bb-distances-distributions_jsd.pdf new file mode 100644 index 00000000..fe727a46 Binary files /dev/null and b/docs/images/receptor_bb-distances-distributions_jsd.pdf differ diff --git a/docs/images/receptor_combined-clusters_bb-torsions.pdf b/docs/images/receptor_combined-clusters_bb-torsions.pdf new file mode 100644 index 00000000..57594e81 Binary files /dev/null and b/docs/images/receptor_combined-clusters_bb-torsions.pdf differ diff --git a/docs/images/receptor_sc-torsions_jsd.pdf b/docs/images/receptor_sc-torsions_jsd.pdf new file mode 100644 index 00000000..67ec2e0b Binary files /dev/null and b/docs/images/receptor_sc-torsions_jsd.pdf differ diff --git a/docs/images/receptor_sc-torsions_ssi.pdf b/docs/images/receptor_sc-torsions_ssi.pdf new file mode 100644 index 00000000..98c37e89 Binary files /dev/null and b/docs/images/receptor_sc-torsions_ssi.pdf differ diff --git a/docs/images/sc-jsd.png b/docs/images/sc-jsd.png new file mode 100644 index 00000000..b5c42b29 Binary files /dev/null and b/docs/images/sc-jsd.png differ diff --git a/docs/images/sc-ssi.png b/docs/images/sc-ssi.png new file mode 100644 index 00000000..e07fb548 Binary files /dev/null and b/docs/images/sc-ssi.png differ diff --git a/docs/tut-2-preprocessing.rst b/docs/tut-2-preprocessing.rst index 740aaee2..e7f7fbd2 100644 --- a/docs/tut-2-preprocessing.rst +++ b/docs/tut-2-preprocessing.rst @@ -169,6 +169,10 @@ relatively rigid with sites that are spatially static, for example internal water cavities in membrane proteins. Here we demonstrate the preprocessing for water density, however the same procedure would be used for ions. +.. image:: images/Density.png + :height: 300px + :align: center + :alt: Density of protein Files and Directories --------------------- @@ -236,7 +240,7 @@ This helps us avoid memory errors with large python arrays. out_name_water_a+".gro", out_name_water_a+"_aligned.xtc", out_name_water_b+".gro", out_name_water_b+".xtc", atomgroup="OH2", write_grid_as="TIP3P", - out_name="traj/water_grid_ab_", + out_name="ab_grid_", use_memmap=True, memmap='traj/combined.mymemmap' ) diff --git a/docs/tut-3-featurization.rst b/docs/tut-3-featurization.rst index 2a09b484..0c47feb6 100644 --- a/docs/tut-3-featurization.rst +++ b/docs/tut-3-featurization.rst @@ -9,6 +9,12 @@ Note that all reader functions load the names of the features and their values separately. +.. image:: images/Torsions.jpg + :height: 300px + :align: center + :alt: Torsion angles. + + Basic Example ************* @@ -133,6 +139,12 @@ molecules occupy that are of interest. Water pocket featurization extracts a distribution that represents whether or not a specific protein cavity is occupied by a water molecule, and what that water molecule's orientation (polarisation) is. + +.. image:: images/WaterFeatures.jpg + :width: 300px + :align: center + :alt: Water features derived from density. + .. code:: python from pensa.features import read_water_features @@ -140,7 +152,7 @@ by a water molecule, and what that water molecule's orientation (polarisation) i For the pdb visualisation, the trajectory needs to be fit to the first frame of the simulation so that the density and protein align with each other. -Here we featurize the top 3 most probable water sites (top_waters = 3). +Here we featurize the top 2 most probable water sites (top_waters = 2). Orientation of the waters (water_data - spherical coordinates [radians]) is a timeseries distribution. When water is not present at the site, the orientation is recorded as 10000.0 to represent an empty state. If write=True, we can @@ -161,7 +173,7 @@ water water_feat, water_data = read_water_features( structure_input = struc, xtc_input = xtc, - top_waters = 1, + top_waters = 2, atomgroup = "OH2", write = True, write_grid_as="TIP3P", @@ -176,11 +188,11 @@ This way, sites are the same across both ensembles and can be compared. struc = "traj/condition-a_water.gro" xtc = "traj/condition-a_water_aligned.xtc" - grid = "traj/water_grid_ab_OH2_density.dx" + grid = "ab_grid_OH2_density.dx" water_feat, water_data = read_water_features( structure_input = struc, xtc_input = xtc, - top_waters = 5, + top_waters = 2, atomgroup = "OH2", grid_input = grid ) @@ -206,10 +218,10 @@ written (write=True) using the default density conversion "Angstrom^{-3}" in MDA atom_feat, atom_data = read_atom_features( structure_input = struc, xtc_input = xtc, - top_atoms = 1, + top_atoms = 2, atomgroup = "SOD", element = "Na", write = True, out_name = "features/11426_dyn_151_sodium" ) - \ No newline at end of file + diff --git a/docs/tut-4-comparison.rst b/docs/tut-4-comparison.rst index e0d90ac1..da789efe 100644 --- a/docs/tut-4-comparison.rst +++ b/docs/tut-4-comparison.rst @@ -87,6 +87,19 @@ distance per residue in the "B factor" field of a PDB file. y_label='max. JS dist. of BB torsions' ) + +.. image:: images/JSD_pdb.png + :height: 300px + :align: center + :alt: JSD pbd b-factor visualisaton file. + + +.. image:: images/sc-jsd.png + :height: 300px + :align: center + :alt: Jensen-Shannon Distance PDF output. + + Let's now save the resulting data in CSV files. .. code:: python @@ -159,6 +172,18 @@ We can plot the results in the same way as we did for the backbone analysis. ) +.. image:: images/SSI_pdb.png + :height: 300px + :align: center + :alt: SSI pbd b-factor visualisaton file. + +.. image:: images/sc-ssi.png + :height: 300px + :align: center + :alt: State-Specific Information PDF output. + + + Comparing Distances ------------------- @@ -194,3 +219,11 @@ divergence, Kolmogorov-Smirnov statistic etc. instead). names_bbdist, jsd_bbdist, "plots/receptor_jsd-bbdist.pdf", vmin = 0.0, vmax = 1.0, cbar_label='JSD' ) + + + +.. image:: images/bb-dists.png + :height: 300px + :align: center + :alt: JSD distances pbf plot. + diff --git a/docs/tut-5-dimensionality.rst b/docs/tut-5-dimensionality.rst index 5b939969..9333d9c6 100644 --- a/docs/tut-5-dimensionality.rst +++ b/docs/tut-5-dimensionality.rst @@ -199,6 +199,14 @@ We now perform the actual clustering on the combined data. ) cidx, cond, oidx, wss, centroids = cc + + + +.. image:: images/bb-clusts.png + :height: 300px + :align: center + :alt: BB torsion cluster pbf plot. + ... and save the results to a CSV file. .. code:: python diff --git a/pensa/features/hbond_features.py b/pensa/features/hbond_features.py index 634aab35..fa7bfbac 100644 --- a/pensa/features/hbond_features.py +++ b/pensa/features/hbond_features.py @@ -483,7 +483,7 @@ def read_water_site_h_bonds_quickly(structure_input, xtc_input, atomgroups, site else: g = Grid(grid_input) elif grid_input is None: - g = generate_grid(u, atomgroups) + g = generate_grid(u, atomgroups[0]) else: g = Grid(grid_input) diff --git a/pensa/features/water_features.py b/pensa/features/water_features.py index 1add901a..ced34702 100644 --- a/pensa/features/water_features.py +++ b/pensa/features/water_features.py @@ -202,7 +202,7 @@ def read_water_features(structure_input, xtc_input, atomgroup, top_waters=10, water_information.append([water_ID, list(atom_location), water_pocket_occupation_frequency]) # Write data out and visualize water sites in pdb - if write is True: + if out_name is not None: data_out(out_name + '_' + water_ID + '.txt', water_out) data_out(out_name + '_WaterSummary.txt', water_information) write_atom_to_pdb(pdb_outname, atom_location, water_ID, atomgroup) diff --git a/tutorial/PENSA_Tutorial_density_featurizer.py b/scripts/density_featurizer.py similarity index 54% rename from tutorial/PENSA_Tutorial_density_featurizer.py rename to scripts/density_featurizer.py index a726a77a..64fc1854 100755 --- a/tutorial/PENSA_Tutorial_density_featurizer.py +++ b/scripts/density_featurizer.py @@ -22,45 +22,43 @@ """ -struc = "mor-data/11426_dyn_151.pdb" -xtc = "mor-data/11423_trj_151.xtc" -water_feat, water_data = read_water_features( - structure_input=struc, - xtc_input=xtc, - top_waters=1, - atomgroup="OH2", - write=True, - write_grid_as="TIP3P", - out_name="11426_dyn_151" -) +# struc = "mor-data/11426_dyn_151.pdb" +# xtc = "mor-data/11423_trj_151.xtc" +# water_feat, water_data = read_water_features( +# structure_input=struc, +# xtc_input=xtc, +# top_waters=1, +# atomgroup="OH2", +# write_grid_as="TIP3P", +# out_name="11426_dyn_151" +# ) -# We can use the get_atom_features, which provides the same -# functionality but ignores orientations as atoms are considered spherically symmetric. +# # We can use the get_atom_features, which provides the same +# # functionality but ignores orientations as atoms are considered spherically symmetric. -struc = "mor-data/11426_dyn_151.pdb" +# struc = "mor-data/11426_dyn_151.pdb" -xtc = "mor-data/11423_trj_151.xtc" +# xtc = "mor-data/11423_trj_151.xtc" -# Here we locate the sodium site which has the highest probability -# The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis +# # Here we locate the sodium site which has the highest probability +# # The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis -atom_feat, atom_data = read_atom_features( - structure_input=struc, - xtc_input=xtc, - top_atoms=1, - atomgroup="SOD", - element="Na", - write=True, - out_name="11426_dyn_151" -) +# atom_feat, atom_data = read_atom_features( +# structure_input=struc, +# xtc_input=xtc, +# top_atoms=1, +# atomgroup="SOD", +# element="Na", +# out_name="11426_dyn_151" +# ) # If we have already obtained the grid, we can speed up featurization by reading it in. struc = "mor-data/11426_dyn_151.pdb" xtc = "mor-data/11423_trj_151.xtc" -grid = "dens/11426_dyn_151OH2_density.dx" +grid = "11426_dyn_151_OH2_density.dx" water_feat, water_data = read_water_features( structure_input=struc, xtc_input=xtc, diff --git a/tutorial/PENSA_Tutorial_hbond_featurizer.py b/scripts/hbond_featurizer.py similarity index 71% rename from tutorial/PENSA_Tutorial_hbond_featurizer.py rename to scripts/hbond_featurizer.py index 2ddab201..e23f3aea 100644 --- a/tutorial/PENSA_Tutorial_hbond_featurizer.py +++ b/scripts/hbond_featurizer.py @@ -1,4 +1,4 @@ -from pensa.features import read_cavity_bonds, read_h_bonds +from pensa.features import hbond_features, atom_features # Featurize water cavity hydrogen bonds @@ -8,26 +8,24 @@ pdb_file_a = root_dir + '/11426_dyn_151.pdb' trj_file_a = root_dir + '/11423_trj_151.xtc' -names, data = read_cavity_bonds( +names, data = hbond_features.read_water_site_h_bonds_quickly( ref_file_a, trj_file_a, atomgroups=['OH2', 'H1', 'H2'], site_IDs=[1, 2], grid_input=None, - write=True, write_grid_as='TIP3P', - out_name='11423_trj_169' + out_name='11423_trj_151' ) -# Faturize ligand-protein hydrogen bonds +# Featurize ligand-protein hydrogen bonds ref_file_a = root_dir + '/11580_dyn_169.psf' pdb_file_a = root_dir + '/11579_dyn_169.pdb' trj_file_a = root_dir + '/11578_trj_169.xtc' -names, data = read_h_bonds( +names, data = hbond_features.read_h_bonds( ref_file_a, trj_file_a, fixed_group='resname 4VO', dyn_group='protein', - write=True, out_name='4VO_hbonds' ) diff --git a/tutorial/PENSA_Tutorial_SSI.py b/scripts/state_specific_info.py similarity index 86% rename from tutorial/PENSA_Tutorial_SSI.py rename to scripts/state_specific_info.py index f608c438..1485d3df 100755 --- a/tutorial/PENSA_Tutorial_SSI.py +++ b/scripts/state_specific_info.py @@ -52,7 +52,7 @@ if not os.path.exists(subdir): os.makedirs(subdir) -# Extract the coordinates of the receptor from the trajectory +# # # Extract the coordinates of the receptor from the trajectory extract_coordinates( ref_file_a, pdb_file_a, trj_file_a, out_name_a + "_receptor", sel_base_a @@ -62,7 +62,7 @@ out_name_b + "_receptor", sel_base_b ) -# Extract the features from the beginning (start_frame) of the trajectory +# # Extract the features from the beginning (start_frame) of the trajectory start_frame = 0 a_rec = read_structure_features( out_name_a + "_receptor.gro", @@ -81,9 +81,9 @@ out_name_a = "condition-a" out_name_b = "condition-b" -# Extract the multivariate torsion coordinates of each residue as a -# timeseries from the trajectory and write into subdirectory -# output = [[torsion 1 timeseries], [torsion 2 timeseries], ..., [torsion n timeseries]] +# # Extract the multivariate torsion coordinates of each residue as a +# # timeseries from the trajectory and write into subdirectory +# # output = [[torsion 1 timeseries], [torsion 2 timeseries], ..., [torsion n timeseries]] sc_multivar_res_feat_a, sc_multivar_res_data_a = get_multivar_res_timeseries( a_rec_feat, a_rec_data, 'sc-torsions', write=True, out_name=out_name_a ) @@ -96,16 +96,16 @@ discretize='gaussian', pbc=True ) -# We can calculate the State Specific Information (SSI) shared between the -# ensemble switch and the combined ensemble residue conformations. As the ensemble -# is a binary change, SSI can exist within the range [0, 1] units=bits. -# 0 bits = no information, 1 bits = maximum information, i.e. you can predict the state of the ensemble -# with certainty from the state of the residue. -# Set write_plots = True to generate a folder with all the clustered states for each residue. +# # We can calculate the State Specific Information (SSI) shared between the +# # ensemble switch and the combined ensemble residue conformations. As the ensemble +# # is a binary change, SSI can exist within the range [0, 1] units=bits. +# # 0 bits = no information, 1 bits = maximum information, i.e. you can predict the state of the ensemble +# # with certainty from the state of the residue. +# # Set write_plots = True to generate a folder with all the clustered states for each residue. data_names, data_ssi = ssi_ensemble_analysis( sc_multivar_res_feat_a['sc-torsions'], sc_multivar_res_feat_b['sc-torsions'], sc_multivar_res_data_a['sc-torsions'], sc_multivar_res_data_b['sc-torsions'], - discrete_states_ab, verbose=True, write_plots=False + discrete_states_ab, verbose=True, write_plots=True ) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -149,7 +149,7 @@ # Extract the combined density of the waters in both ensembles a and b extract_combined_grid( - out_name_a + ".gro", "dens/cond-a_wateraligned.xtc", + out_name_a + ".gro", "traj/cond-a_water_aligned.xtc", out_name_b + ".gro", out_name_b + ".xtc", atomgroup="OH2", write_grid_as="TIP3P", @@ -157,18 +157,17 @@ use_memmap=True ) -grid_combined = "dens/ab_grid_OH2_density.dx" +grid_combined = "ab_grid_OH2_density.dx" # Then we featurize the waters common to both simulations # We can do the same analysis for ions using the get_atom_features featurizer. water_feat_a, water_data_a = read_water_features( structure_input=out_name_a + ".gro", - xtc_input="dens/cond-a_wateraligned.xtc", + xtc_input="traj/cond-a_water_aligned.xtc", top_waters=2, atomgroup="OH2", grid_input=grid_combined, - write=True, out_name="cond_a" ) @@ -178,15 +177,14 @@ top_waters=2, atomgroup="OH2", grid_input=grid_combined, - write=True, out_name="cond_b" ) -# Calculating SSI is then exactly the same as for residues +# Calculating SSI is then exactly the same as for residues, with the h2o argument set to True. discrete_states_ab1 = get_discrete_states( water_data_a['WaterPocket_Distr'], water_data_b['WaterPocket_Distr'], - discretize='gaussian', pbc=True + discretize='gaussian', pbc=True, h2o=True, write_plots=True ) # SSI shared between waters and the switch between ensemble conditions @@ -198,7 +196,7 @@ # Alternatively we can see if the pocket occupancy (the presence/absence of water at the site) shares SSI # Currently this is only enabled with ssi_ensemble_analysis. We need to turn off the periodic boundary conditions -# as the distributions are no longer periodic. +# as the distributions are no longer periodic. discrete_states_ab2 = get_discrete_states( water_data_a['WaterPocket_OccupDistr'], diff --git a/tutorial/0-download.sh b/tutorial/0-download.sh old mode 100644 new mode 100755 diff --git a/tutorial/1-preprocessing-gpcrmd.sh b/tutorial/1-preprocessing-gpcrmd.sh old mode 100644 new mode 100755 diff --git a/tutorial/2-comparison-of-feature-distributions.sh b/tutorial/2-comparison-of-feature-distributions.sh old mode 100644 new mode 100755 diff --git a/tutorial/3-principal-component-analysis.sh b/tutorial/3-principal-component-analysis.sh old mode 100644 new mode 100755 diff --git a/tutorial/4-clustering.sh b/tutorial/4-clustering.sh old mode 100644 new mode 100755 diff --git a/tutorial/5-density-hbond-featurization.sh b/tutorial/5-density-hbond-featurization.sh new file mode 100755 index 00000000..b2936bec --- /dev/null +++ b/tutorial/5-density-hbond-featurization.sh @@ -0,0 +1,8 @@ +#!/bin/bash + + + +#python ../scripts/hbond_featurizer.py + +python ../scripts/density_featurizer.py + diff --git a/tutorial/6-state-specific-information.sh b/tutorial/6-state-specific-information.sh new file mode 100755 index 00000000..7a98c8e7 --- /dev/null +++ b/tutorial/6-state-specific-information.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +python ../scripts/state_specific_info.py + diff --git a/tutorial/runsh.sh b/tutorial/runsh.sh new file mode 100755 index 00000000..bae5a4e9 --- /dev/null +++ b/tutorial/runsh.sh @@ -0,0 +1,16 @@ +#!/bin/bash + + + +./1-preprocessing-gpcrmd.sh + +./2-comparison-of-feature-distributions.sh + +./3-principal-component-analysis.sh + +./4-clustering.sh + +./5-density-hbond-featurization.sh + +./6-state-specific-information.sh +-