-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
285 lines (228 loc) · 10 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# Set of functions to visualize MSLD trajectories
# Written by: Luis F Cervantes and Furyal Ahmed (2/23)
import numpy as np
import sys
import string
import os
from functools import reduce
import pycharmm
import pycharmm.lingo as lingo
import pycharmm.generate as gen
import pycharmm.ic as ic
import pycharmm.coor as coor
import pycharmm.energy as energy
import pycharmm.dynamics as dyn
import pycharmm.nbonds as nbonds
import pycharmm.minimize as minimize
import pycharmm.crystal as crystal
import pycharmm.image as image
import pycharmm.psf as psf
import pycharmm.read as read
import pycharmm.write as write
import pycharmm.settings as settings
import pycharmm.cons_harm as cons_harm
import pycharmm.cons_fix as cons_fix
import pycharmm.select as select
import pycharmm.shake as shake
import pycharmm.charmm_file as charmm_file
from pycharmm.lib import charmm as libcharmm
def take_overlap(*input):
"""
Given an unpacked list `input` of np.arrays,
take the overlap of the arrays given their first
column.
In:
`input` : unpacked list of np.arrays
Out:
`result`: list of np.arrays
"""
n = len(input)
maxIndex = max(array[:, 0].max() for array in input)
indicator = np.zeros(maxIndex + 1, dtype=int)
for array in input:
indicator[array[:, 0]] += 1
indicator = indicator == n
result = []
for array in input:
# Look up each integer in the indicator array
mask1 = indicator[array[:, 0]]
# Use boolean indexing to get the sub array
result.append(array[mask1])
return result
def get_subs_on(LambdaFile, nsubs, cutoff=0.99, nsavc=10000, nsavl=10):
"""
Create subs_on matrix of shape (nframes x nsites), where
A_{i,j} is the substituent index that is on at the ith frame
and jth site (zero-based indexing)
In:
LambdaFile (str) : path to lambda trajectory file
nsubs [int, int,...] : list of nsubs per site
cutoff (float) : lambda cutoff
nsavc (int) : freq of saving frames
nsavl (int) : freq of saving lambdas
Out:
subs_on 2-D np.array : subs_on matrix
"""
# Check to make sure frames align with lambda values
assert (nsavc/nsavl).is_integer, f"Frequency of saving lambdas and frames are not multiples of each other"
# Define sites and skip
nsites = len(nsubs)
skip = int(nsavc/nsavl)
# Load lambda trajectory
lams = np.loadtxt(LambdaFile)
# Extract physical substituents based on cutoff
physical_subs = []
lambdasPerFrame = []
for site in range(nsites):
# Find indices for all subs at one site
if site == 0:
index1 = 0
else:
index1 = np.cumsum(nsubs[:site+1])[site-1]
index2 = np.cumsum(nsubs[:site+1])[-1] -1
# Get lambdas for the site based on indices
lams_site = lams[skip-1::skip,index1:index2+1]
lambdasPerFrame.append(lams_site)
# Retrive physical substituents on a site
mask = lams_site >= cutoff
subs_on = np.argwhere(mask)
physical_subs.append(subs_on)
# Assemble lambda trajectory for each frame
lambdasPerFrame = np.concatenate(lambdasPerFrame,1)
# Assemble array of physical ligands at each frame
subs_on = physical_subs[0]
if nsites != 1:
# Get full physical end states only
physical_subs = take_overlap(*physical_subs)
subs_on = physical_subs[0]
for arr in physical_subs[1:]:
subs_on = np.append(subs_on,np.reshape(arr[:,1],(subs_on.shape[0],1)),1)
return subs_on, lambdasPerFrame
def streamFileToCHARMM(FilePath):
"""
Read a stream file in CHARMM via pyCHARMM lingo
In:
FilePath (str): path to file to stream
Out:
CHARMM output in stdout
"""
# Open file
with open(FilePath,'r') as f:
stream = f.readlines()
# Exclude title from stream
stream = [line for line in stream if not line.startswith('*')]
stream = ''.join(stream)
# Stream
lingo.charmm_script(stream)
def grab_frames(sysname, nsubs, subs_on, StructureFile, nsteps, nsavc, ns, eqS, ini, dup, MSLDInpPath=f'./prep/', NoSolv=False):
"""
Given system specific properties and output from get_subs_on, return
a directory with frames for each physical ligand.
In:
sysname (str) : MSLD system name
nsubs [int, int,...] : list of nsubs per site
subs_on (float) : np.array of size (nframes x sum(nsubs)+1)
denotes frame index at col1 and subsituent
index at each site on the rest of the cols
StructureFile (str) : path to MSLD system PSF
nsteps (int) : number of steps per ns trajectory
nsavc (str) : freq of saving coordinates
ns (int) : total number of ns
eqS (int) : number of ns to discard as equilibration
ini (int) : ALF iteration from which to grab trajectories
dup (str) : letter a-z denoting which duplicate to grab
trajectory from
MSLDInpPath (str) : path to MSLD {sysname}.inp file
NoSolv (bool) : option to exclude solvent from frames
(excludes water and ions)
Out:
subs_on 2-D np.array : subs_on matrix
"""
# Define total number of frames per ns
nf = int(nsteps/nsavc)
# Split subs_on
physicalFrames = subs_on[:,0]
subs = subs_on[:,1:]
# Make frames direcotry if it doesn't exist
framesPath = f'./run{ini}{dup}_frames'
if not os.path.exists(framesPath):
os.mkdir(framesPath)
# Define trajectory path
TrajPath = f'./run{ini}{dup}/dcd/'
# Stream MSLD {sysname}.inp and associated stream files
variablesFile = f'./variables{ini}.inp'
MSLDInpFile = os.path.join(MSLDInpPath,f'{sysname}.inp')
streamFileToCHARMM(variablesFile)
streamFileToCHARMM(MSLDInpFile)
# Extract physical frames
iframe = 0 # current overall frame across all ns trajectories
for i in range(eqS+1,ns+1):
# Define ns trajectory file name
TrajFile = os.path.join(TrajPath,f'{sysname}_prod{i}.dcd_{rep}')
# Iterate over frames until we hit a physical one
for j in range(0,nf): # j is the current ns frame index. It is iframe % nf
if iframe in physicalFrames:
# Get substituent indices per site for physical frame
subb = subs[iframe,:]
sub = '_'.join([str(x+1) for x in subb])
# Create directory for physical ligand
physSubDir = os.path.join(framesPath,sub)
if not os.path.exists(physSubDir):
os.mkdir(physSubDir)
# Define pdb file path to be created
pdbFileName = os.path.join(physSubDir,f'{iframe}.pdb')
print(iframe)
print(j)
# Open traj and read up to physical frame
lingo.charmm_script(f'''open unit 51 read unform name {TrajFile}
traj first 51 nunit 1 skip {nsavc}
''')
for ii in range(0,j+1):
lingo.charmm_script('''
traj read
''')
# Create atom selection
dontInclude = []
for site, sub in enumerate(nsubs):
dontInclude.extend([f'site{site+1}_sub{s+1}' for _,s in enumerate(range(sub)) if s != subb[site]])
dontInclude = ' .or. -\n'.join(dontInclude)
selection = f'sele .not. ({dontInclude}) end'
if NoSolv:
selection = f'sele .not. ({dontInclude}) .and. (.not. (segid IONS .or. segid SOLV .or. segid W*T*)) end'
# Write pdb file
lingo.charmm_script(f'coor orient')
lingo.charmm_script(f'''open write card unit 12 name {pdbFileName}
write coor {selection} pdb unit 12
''')
# Delete all atoms, close all units and reload PSF
psf.delete_atoms(pycharmm.SelectAtoms().all_atoms())
lingo.charmm_script('''close unit 51
close unit 12
''')
settings.set_bomb_level(-1)
read.psf_card(StructureFile, append = True) # Bypass image centering issues
settings.set_bomb_level(0)
iframe += 1
if __name__ == '__main__':
# Define variables
sysname = 'jnk1' # System name
cutoff = 0.90 # Only consider lambda >= cutoff as physical
ns = 6 # Total number of ns (including equilibration time)
eqS = 5 # Equilibration time (first `eqS` discarded from `ns`)
nsteps = 500000 # Number of steps per ns run
nsavc = 10000 # Frequency of saving frames
nsavl = 10 # Frequency of saving lambdas
nsubs = [4] # [nsubs1, nsubs2, ...] List of number of subs at each site
ini= 65 # ALF run number
dup= 'b' # Production duplicate to process
rep = 0 # Replica number (0 if no replica exchange)
StructureFile='./prep/minimized.psf' # MSLD System PSF
nf = nsteps/nsavc
inx=string.ascii_lowercase.index(dup) # zero-based index
LambdaFile=f'analysis{ini}/data/Lambda.{inx}.0.dat'
skip = int(nsavc/nsavl) # save frequency of traj wrt save freq of lambdas
# Run
subs_on, lambdasPerFrame = get_subs_on(LambdaFile, nsubs, nsavc=nsavc,\
nsavl=nsavl, cutoff=cutoff)
grab_frames(sysname, nsubs, subs_on, StructureFile,\
nsteps, nsavc, ns, eqS, ini, dup)