forked from dkoes/rdkit-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractmatches.py
executable file
·107 lines (88 loc) · 3.15 KB
/
extractmatches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/python
#given a smarts rxn file (backwards), a core scaffold smarts file (with name containing connecting atoms)
# an sdf file with the results of clustering scaffolds and an input sdf file and an
#output prefix, output the extracted reactant conformations aligned to their scaffold
import sys,gzip,argparse
from rdkit.Chem import AllChem
def subMol(mol, match):
#not sure why this functionality isn't implemented natively
#but get the interconnected bonds for the match
atoms = set(match)
bonds = set()
for a in atoms:
atom = mol.GetAtomWithIdx(a)
for b in atom.GetBonds():
if b.GetOtherAtomIdx(a) in atoms:
bonds.add(b.GetIdx())
return AllChem.PathToSubmol(mol,list(bonds))
#return index of closest scaffold in scaffold to mol
def closestScaffold(scaffolds, pattern, core, mol):
ret = -1
match = mol.GetSubstructMatch(pattern)
if match:
sub = subMol(mol, match)
cmatch = sub.GetSubstructMatch(core)
if cmatch:
min = float('inf')
for (i,(s,smatch)) in enumerate(scaffolds):
r = AllChem.GetBestRMS(s, sub, maps=[zip(cmatch,smatch)])
if r < min:
min = r
ret = i
mmatch = mol.GetSubstructMatch(core)
AllChem.GetBestRMS(s,mol,maps=[zip(mmatch,smatch)])
return ret
#MAIN
parser = argparse.ArgumentParser()
parser.add_argument('-r','--rxn', help="Reaction file")
parser.add_argument('-c','--core',help="Core scaffold with connecting atoms in name")
parser.add_argument('-i','--input',help="Input conformers")
parser.add_argument('-s','--scaffolds',help="Scaffold conformers")
parser.add_argument('-o','--output',help="Output prefix")
args = parser.parse_args()
rxnf = open(args.rxn)
rxnsm = rxnf.readline().split()[0] #ignore any name
rxn = AllChem.ReactionFromSmarts(rxnsm)
rxn.Initialize()
if rxn.GetNumReactantTemplates() != 1:
print "Need backwards reaction"
sys.exit(-1)
coref = open(args.core)
corel = coref.readline()
coreconnects = corel.split()[1:]
core = AllChem.MolFromSmarts(corel.split()[0])
inscaffolds = AllChem.SDMolSupplier(args.scaffolds,False)
if inscaffolds is None:
print "Could not open ",args.scaffolds
sys.exit(-1)
inmols = AllChem.SDMolSupplier(args.input)
if inmols is None:
print "Could not open ",args.input
sys.exit(-1)
smart = AllChem.MolToSmarts(rxn.GetReactantTemplate(0))
pattern = AllChem.MolFromSmarts(smart)
#read in scaffolds
scaffolds = list()
for mol in inscaffolds:
#compute match of core
cmatch = mol.GetSubstructMatch(core)
scaffolds.append((mol,cmatch))
#setup output file, one for each reactant product
outputs = list()
for i in xrange(rxn.GetNumProductTemplates()):
outputs.append(list())
for j in xrange(len(scaffolds)):
sdwriter = AllChem.SDWriter("%s_%d_%d.sdf" % (args.output,i,j))
outputs[i].append(sdwriter)
for mol in inmols:
#for each mol, decompose it into its reactants
mol = AllChem.AddHs(mol)
#figure out which scaffold conformation is closest
c = closestScaffold(scaffolds, pattern, core, mol)
prods = rxn.RunReactants([mol])
if c >= 0:
for p in prods: #there may be multiple possible products
for (i,react) in enumerate(p):
react = AllChem.RemoveHs(react)
react.SetProp('_Name',AllChem.MolToSmiles(react))
outputs[i][c].write(react)