-
Notifications
You must be signed in to change notification settings - Fork 0
/
mk_prepare_ligand_v0.6.0a2.py
377 lines (335 loc) · 18.3 KB
/
mk_prepare_ligand_v0.6.0a2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
#
import argparse
import os
import sys
import json
import warnings
from rdkit import Chem
from meeko import MoleculePreparation
from meeko import rdkitutils
from meeko import PDBQTWriterLegacy
try:
import prody
from meeko import CovalentBuilder
_prody_parsers = {'pdb': prody.parsePDB, 'mmcif': prody.parseMMCIF}
warnings.warn("Prody not available, covalent docking won't work", ImportWarning)
except:
_has_prody = False
_prody_parsers = {}
else:
_has_prody = True
try:
from meeko import obutils # fails if openbabel not available
except:
_has_openbabel = False
else:
_has_openbabel = True
def cmd_lineparser():
backend = 'rdkit'
if '--ob_backend' in sys.argv:
if not _has_openbabel:
raise ImportError('--ob_backend requires openbabel which is not available')
backend = 'ob'
sys.argv.remove('--ob_backend')
conf_parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False)
conf_parser.add_argument('-c', '--config_file',
help='configure MoleculePreparation from JSON file. Overriden by command line args.')
confargs, remaining_argv = conf_parser.parse_known_args()
config = MoleculePreparation.get_defaults_dict()
if confargs.config_file is not None:
with open(confargs.config_file) as f:
c = json.load(f)
config.update(c)
parser = argparse.ArgumentParser()#parents=[conf_parser]) # parents shows --config_file in help msg
parser.set_defaults(**config)
parser.add_argument("-v", "--verbose", dest="verbose",
action="store_true", help="print information about molecule setup")
io_group = parser.add_argument_group("Input/Output")
io_group.add_argument("-i", "--mol", dest="input_molecule_filename", required=True,
action="store", help="molecule file (MOL2, SDF,...)")
io_group.add_argument("-o", "--out", dest="output_pdbqt_filename",
action="store", help="output pdbqt filename. Single molecule input only.")
io_group.add_argument("--multimol_outdir", dest="multimol_output_dir",
action="store", help="folder to write output pdbqt for multi-mol inputs. Incompatible with -o/--out and -/--.")
io_group.add_argument("--multimol_prefix", dest="multimol_prefix",
action="store", help="replace internal molecule name in multi-molecule input by specified prefix. Incompatible with -o/--out and -/--.")
io_group.add_argument('-', '--', dest='redirect_stdout', action='store_true',
help='do not write file, redirect output to STDOUT. Argument -o/--out is ignored. Single molecule input only.')
config_group = parser.add_argument_group("Molecule preparation")
config_group.add_argument('-c', '--config_file',
help='configure MoleculePreparation from JSON file. Overriden by command line args.') # parsed above by conf_parser, here for help msg
config_group.add_argument("--rigid_macrocycles",dest="rigid_macrocycles",
action="store_true", help="keep macrocycles rigid in input conformation")
config_group.add_argument("--keep_chorded_rings",dest="keep_chorded_rings",
action="store_true", help="return all rings from exhaustive perception")
config_group.add_argument("--keep_equivalent_rings",dest="keep_equivalent_rings",
action="store_true", help="equivalent rings have the same size and neighbors")
config_group.add_argument("--min_ring_size", dest="min_ring_size",
type=int, help="min nr of atoms in ring for opening")
config_group.add_argument("-w", "--hydrate", dest="hydrate",
action="store_true", help="add water molecules for hydrated docking")
config_group.add_argument("--merge_these_atom_types", dest="merge_these_atom_types", nargs="*",
help="list of atom types to merge, default is \"H\"", default=["H"])
config_group.add_argument("-r", "--rigidify_bonds_smarts", dest="rigidify_bonds_smarts",
action="append", help="SMARTS patterns to rigidify bonds",
metavar='SMARTS')
config_group.add_argument("-b", "--rigidify_bonds_indices", dest="rigidify_bonds_indices",
action="append", help="indices of two atoms (in the SMARTS) that define a bond (start at 1)",
nargs='+', type=int, metavar='i j')
config_group.add_argument("-a", "--flexible_amides", dest="flexible_amides",
action="store_true", help="allow amide bonds to rotate and be non-planar, which is bad")
config_group.add_argument("-p", "--atom_type_smarts", dest="atom_type_smarts_json",
action="store", help="SMARTS based atom typing (JSON format)")
config_group.add_argument("-aa", "--add_atom_types", dest="add_atom_types_json",
action="append", help="Additional atom types to assign (JSON formated)", metavar="[{'smarts': '<smarts pattern>', 'atype': ',atomtype name>'}, {'smarts': '<smarts pattern>', 'atype': ',atomtype name>'}]")
config_group.add_argument("--double_bond_penalty", help="penalty > 100 prevents breaking double bonds", type=int)
config_group.add_argument("--bad_charge_ok", help="NaN and Inf charges allowed in PDBQT", action="store_true")
config_group.add_argument("--add_index_map", dest="add_index_map",
action="store_true", help="write map of atom indices from input to pdbqt")
config_group.add_argument("--remove_smiles", dest="remove_smiles",
action="store_true", help="do not write smiles as remark to pdbqt")
reactive_group = parser.add_argument_group("Reactive docking")
reactive_group.add_argument('--reactive_smarts', help="SMARTS pattern for reactive group")
reactive_group.add_argument('--reactive_smarts_idx', help='index (1-based) of the reactive atom in --reactive_smarts', type=int)
need_prody_msg = ''
if not _has_prody: need_prody_msg = ". Needs Prody which is unavailable"
covalent_group = parser.add_argument_group("Covalent docking (tethered)%s" % (need_prody_msg))
covalent_group.add_argument('--receptor', help='receptor filename. Supported formats: [%s]%s' % (
'/'.join(list(_prody_parsers.keys())),
need_prody_msg))
covalent_group.add_argument('--rec_residue',
help='examples: "A:LYS:204", "A:HIS:", ":LYS:"')
covalent_group.add_argument('--tether_smarts',
help='SMARTS pattern to define ligand atoms for receptor attachment')
covalent_group.add_argument('--tether_smarts_indices', type=int, nargs=2, required=False,
metavar='IDX', default=[1, 2],
help='indices (1-based) of the SMARTS atoms that will be attached (default: 1 2)')
args = parser.parse_args(remaining_argv)
# check reactive arguments
if (args.reactive_smarts is None) != (args.reactive_smarts_idx is None):
print("Arguments --reactive_smarts and --reactive_smarts_idx require each other", file=sys.stderr)
sys.exit(2)
elif args.reactive_smarts_idx is not None:
if args.reactive_smarts_idx < 1:
print("--reactive_smarts_idx is 1-indexed, but got %d" % args.reactive_smarts_idx, file=sys.stderr)
sys.exit(2)
args.reactive_smarts_idx -= 1 # convert from 1- to 0-index
# command line arguments override config
for key in config:
if key in args.__dict__:
config[key] = args.__dict__[key]
if args.atom_type_smarts_json is not None:
with open(args.atom_type_smarts_json) as f:
config['atom_type_smarts'] = json.load(f)
if args.add_atom_types_json is not None:
additional_ats = []
for at in args.add_atom_types_json:
at = json.loads(at)
if type(at) == dict:
additional_ats.append(at)
elif type(at) == list:
additional_ats.extend(at)
config['add_atom_types'] = additional_ats
if args.multimol_output_dir is not None or args.multimol_prefix is not None:
if args.output_pdbqt_filename is not None:
print("Warning: -o/--out ignored with --multimol_outdir or --multimol_prefix", file=sys.stderr)
if args.redirect_stdout:
print("Warning: -/-- ignored with --multimol_outdir or --multimol_prefix", file=sys.stderr)
# verify sanity of covalent docking input
num_required_covalent_args = 0
num_required_covalent_args += int(args.receptor is not None)
num_required_covalent_args += int(args.rec_residue is not None)
num_required_covalent_args += int(args.tether_smarts is not None)
if num_required_covalent_args not in [0, 3]:
print("Error: --receptor, --rec_residue, and --tether_smarts are all required for covalent docking.")
sys.exit(2)
is_covalent = num_required_covalent_args == 3
if is_covalent and not _has_prody:
raise ImportError("Covalent docking requires Prody which is not available")
if min(args.tether_smarts_indices) < 1:
print("--tether_smarts_indices is 1-indexed, all values must be greater than zero", file=sys.stderr)
sys.exit(2)
args.tether_smarts_indices = [i-1 for i in args.tether_smarts_indices] # convert to 0-index
# verify sanity of SMARTS patterns to make bonds rigid and convert to 0-based indices
rigidify_bonds_smarts = config['rigidify_bonds_smarts']
rigidify_bonds_indices = config['rigidify_bonds_indices']
if len(rigidify_bonds_indices) != len(rigidify_bonds_smarts):
raise RuntimeError('length of --rigidify_bonds_indices differs from length of --rigidify_bonds_smarts')
for indices in rigidify_bonds_indices:
if len(indices) != 2:
raise RuntimeError('--rigidify_bonds_indices must specify pairs, e.g. -b 1 2 -b 3 4')
indices[0] = indices[0] - 1 # convert from 1- to 0-index
indices[1] = indices[1] - 1
return args, config, backend, is_covalent
class Output:
def __init__(self, multimol_output_dir, multimol_prefix, redirect_stdout, output_filename):
is_multimol = (multimol_prefix is not None) or (multimol_output_dir is not None)
self._mkdir(multimol_output_dir)
if multimol_output_dir is None:
multimol_output_dir = '.'
self.multimol_output_dir = multimol_output_dir
self.multimol_prefix = multimol_prefix
self.redirect_stdout = redirect_stdout
self.output_filename = output_filename
self.is_multimol = is_multimol
self.visited_filenames = set()
self.duplicate_filenames = set()
self.visited_names = set()
self.duplicate_names = set()
self.num_files_written = 0
self.counter = 0
def __call__(self, pdbqt_string, name, suffixes=()):
self.counter += 1
if name in self.visited_names:
self.duplicate_names.add(name)
self.visited_names.add(name)
if self.multimol_prefix is not None:
name = '%s-%d' % (args.multimol_prefix, self.counter)
for suffix in suffixes:
if suffix is not None and len(suffix) > 0:
name += "_" + suffix
if self.is_multimol:
if name in self.visited_filenames:
self.duplicate_filenames.add(name)
repeat_id = 1
newname = name + "-again%d" % repeat_id
while newname in self.visited_filenames:
repeat_id += 1
newname = name + "-again%d" % repeat_id
print("Renaming %s to %s to disambiguate filename" % (name, newname), file=sys.stderr)
name = newname
self.visited_filenames.add(name)
fpath = os.path.join(self.multimol_output_dir, name + '.pdbqt')
print(pdbqt_string, end='', file=open(fpath, 'w'))
self.num_files_written += 1
elif self.redirect_stdout:
print(pdbqt_string, end='')
else:
if self.output_filename is None:
filename = '%s.pdbqt' % name
else:
filename = self.output_filename
print(pdbqt_string, end='', file=open(filename, 'w'))
self.num_files_written += 1
def _mkdir(self, multimol_output_dir):
"""make directory if it doesn't exist yet """
if multimol_output_dir is not None:
if not os.path.exists(multimol_output_dir):
os.mkdir(multimol_output_dir)
def get_duplicates_info_string(self):
if not self.is_multimol:
return None
if len(self.duplicate_names):
# with multimol_prefix with can have duplicate molecule names,
# but not duplicate filenames. This warning is for such cases.
string = "Warning: %d molecules have duplicated names" % len(self.duplicate_names)
else:
string = "No duplicate molecule molecule names were found"
# if we have duplicate_filenames, we also have duplicate molecule names,
# but it suffices to return the string below
if len(self.duplicate_filenames):
string = "Warning: %d molecules with repeated names were suffixed with -again<n>" % len(self.duplicate_filenames)
return string
@staticmethod
def get_suffixes(molsetups):
if len(molsetups) == 1:
return ("",) # no suffix needed
else:
return tuple("mk%d" % (i + 1) for i in range(len(molsetups)))
if __name__ == '__main__':
args, config, backend, is_covalent = cmd_lineparser()
input_molecule_filename = args.input_molecule_filename
# read input
input_fname, ext = os.path.splitext(input_molecule_filename)
ext = ext[1:].lower()
if backend == 'rdkit':
parsers = {'sdf': Chem.SDMolSupplier, 'mol2': rdkitutils.Mol2MolSupplier, 'mol': Chem.SDMolSupplier}
if not ext in parsers:
print("*ERROR* Format [%s] not in supported formats [%s]" % (ext, '/'.join(list(parsers.keys()))))
sys.exit(1)
mol_supplier = parsers[ext](input_molecule_filename, removeHs=False) # input must have explicit H
elif backend == 'ob':
print("Using openbabel instead of rdkit")
mol_supplier = obutils.OBMolSupplier(input_molecule_filename, ext)
# configure output writer
if args.output_pdbqt_filename is None:
output_filename = input_fname + '.pdbqt'
else:
output_filename = args.output_pdbqt_filename
output = Output(
args.multimol_output_dir,
args.multimol_prefix,
args.redirect_stdout,
output_filename,
)
# initialize covalent object for receptor
if is_covalent:
rec_filename = args.receptor
_, rec_extension = os.path.splitext(rec_filename)
rec_extension = rec_extension[1:].lower()
prody_parser = _prody_parsers[rec_extension]
rec_prody_mol = prody_parser(rec_filename)
covalent_builder = CovalentBuilder(rec_prody_mol, args.rec_residue)
input_mol_counter = 0
input_mol_skipped = 0
input_mol_with_failure = 0 # if reactive or covalent, each mol can yield multiple PDBQT
nr_failures = 0
is_after_first = False
preparator = MoleculePreparation.from_config(config)
for mol in mol_supplier:
if is_after_first and output.is_multimol == False:
print("Processed only the first molecule of multiple molecule input.")
print("Use --multimol_prefix and/or --multimol_outdir to process all molecules in %s." % (
input_molecule_filename))
break
is_after_first = True
# check that molecule was successfully loaded
if backend == 'rdkit':
is_valid = mol is not None
elif backend == 'ob':
is_valid = mol.NumAtoms() > 0
input_mol_skipped += int(is_valid==False)
if not is_valid: continue
this_mol_had_failure = False
if is_covalent:
for cov_lig in covalent_builder.process(mol, args.tether_smarts, args.tether_smarts_indices):
root_atom_index = cov_lig.indices[0]
molsetups = preparator.prepare(cov_lig.mol, root_atom_index=root_atom_index, not_terminal_atoms=[root_atom_index])
res, chain, num = cov_lig.res_id
suffixes = output.get_suffixes(molsetups)
for molsetup, suffix in zip(molsetups, suffixes):
pdbqt_string, success, error_msg = PDBQTWriterLegacy.write_string(molsetup, bad_charge_ok=args.bad_charge_ok)
if success:
pdbqt_string = PDBQTWriterLegacy.adapt_pdbqt_for_autodock4_flexres(pdbqt_string, res, chain, num)
name = molsetup.name
output(pdbqt_string, name, (cov_lig.label, suffix))
else:
nr_failures += 1
this_mol_had_failure = True
print(error_msg, file=sys.stderr)
else:
molsetups = preparator.prepare(mol)
suffixes = output.get_suffixes(molsetups)
for molsetup, suffix in zip(molsetups, suffixes):
pdbqt_string, success, error_msg = PDBQTWriterLegacy.write_string(molsetup, bad_charge_ok=args.bad_charge_ok)
title = mol.GetProp('_Name')
pdbqt_string = 'REMARK Name = '+title+'\n'+pdbqt_string
if success:
name = molsetup.name
output(pdbqt_string, name, (suffix,))
if args.verbose:
molsetup.show()
else:
nr_failures += 1
this_mol_had_failure = True
print(error_msg, file=sys.stderr)
input_mol_with_failure += int(this_mol_had_failure)
if output.is_multimol:
print("Input molecules processed: %d, skipped: %d" % (output.counter, input_mol_skipped))
print("PDBQT files written: %d" % (output.num_files_written))
print("PDBQT files not written due to error: %d" % (nr_failures))
print("Input molecules with errors: %d" % (input_mol_with_failure))
print(output.get_duplicates_info_string())