Skip to content

Commit

Permalink
downgraded rdkit to 2020.09.5 avoid bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Truman-Xu committed Jun 24, 2021
1 parent 1e21947 commit 72e60e3
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 40 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ __pycache__/
/conda/*
*.egg-info/*
test_hyper.param

conda-pkg/
build/
dist/
7 changes: 6 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ channels:
dependencies:
- python>=3.7
- pytorch
- rdkit>=2020.03.3.0
- rdkit=2020.09.5
## Note: The new code for Chem.MolFragmentToSmiles() will not work on the JTVAE
## in the newer versions of rdkit (starting from v2021.03.1 and hopefully ends with 03.4?)
## See issue https://github.com/rdkit/rdkit/issues/3998
## and https://github.com/chemprop/chemprop/pull/182
## The work on the patch is to be released in 2021.03.4
- scipy
- rxdock
- tmap
Expand Down
12 changes: 6 additions & 6 deletions hyper.param
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

############### Parameters for SnD #####################
receptor_name = CDK2_5IEV # name the receptor to create dir
receptor_file = /home/ziqiaoxu/SampleDock/targets/CDK2_5IEV/5IEV.mol2
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2
# must be mol2 format
ligand_file = /home/ziqiaoxu/SampleDock/targets/CDK2_5IEV/Roniciclib.sd
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd
# must be sd format
ncycle = 1600 # number of cycles to be run
ndesign = 20 # number of designs to be generated per cycle
ensemble = 1 # top of number of design to generate the average structure
ensemble = 1 # number of top designs to generate the average structure
# (1 being just the top scoring structure)
seed_smi = C1(C(NC2=CC=CC=C2)=NC=N3)=C3C=CC=C1
nseeds = 1 # number of top designs to be used as seeds for distributed generation
# nseeds overrides ensemble
seed_smi = C1=CC=CC=C1
# initial seeding SMILES for the first cycle, default to benzene

############### Parameters for rDock ###################
cavity_protocol = rbcavity -was -d -r # cmd and option for creating pocket
# -W option for rxdock precompiled bin file, -was for locally compiled rdock installation
docking_prm = dock.prm # docking protocol (-p), no solvation term by default
npose = 100 # number of poses generated (-n)
prefix = pose_docked_ # prefix of the output files from rDock
Expand Down
1 change: 0 additions & 1 deletion sampledock/SnD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .docking import dock, sort_pose, save_pose
from .pocket_prepare import prep_prm
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
from .generator import single_generator, distributed_generator
from .post_process import mkdf, combine_designs
Expand Down
4 changes: 2 additions & 2 deletions sampledock/SnD/docking.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def dock(ligs, dock_dir, prmfile, docking_prm, npose, prefix = 'docked'):
# ligs must be a list of file path
print('[INFO]: Docking in Progress\t', end = '\r')
print('[INFO]: Docking in Progress\t\t', end = '\r')
sys.stdout.flush()
procs = []
for i,lig in enumerate(ligs):
Expand All @@ -40,7 +40,7 @@ def sort_pose(dock_dir, sort_by, prefix = None):
if x.endswith('.sd')]

if len(poses_mols) == 0:
raise Exception('No .sd file matching the criteria in %s'%dock_dir)
raise FileNotFoundError('No .sd file matching the criteria in %s'%dock_dir)

best_poses = []
for mol_path in poses_mols:
Expand Down
16 changes: 11 additions & 5 deletions sampledock/SnD/pocket_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def wrt_prm(parameter,filename='pocket_docking.prm'):
def prep_prm(receptor,ligand,recep_name,target_dir):

receptor = os.path.abspath(receptor)
if not os.path.exists(receptor): print(receptor+'\nRECEPTOR FILE NOT EXIST!'); return None
if not os.path.exists(receptor): raise FileNotFoundError(receptor+'\nRECEPTOR FILE NOT EXIST!')

ligand = os.path.abspath(ligand)
if not os.path.exists(ligand): print(ligand+'\nLIGAND FILE NOT EXIST!'); return None
if not os.path.exists(ligand): raise FileNotFoundError(ligand+'\nLIGAND FILE NOT EXIST!')

cav_dir = os.path.abspath(target_dir)+'/cavity'
os.makedirs(cav_dir)
Expand All @@ -65,10 +65,16 @@ def prep_prm(receptor,ligand,recep_name,target_dir):

def create_cav(prmfile):
# rbcavity must be installed/loaded to execute the cmdline
cmdline = "rbcavity -was -d -r %s"%prmfile
cmdline = "rbcavity -W -d -r %s"%prmfile
proc = subprocess.Popen(cmdline, shell=True)
proc.wait()
print('Docking pocket grid created for: \n'+prmfile+'\n')
returncode = proc.wait()
if returncode == 2:
cmdline = "rbcavity -was -d -r %s"%prmfile
proc = subprocess.Popen(cmdline, shell=True)
returncode = proc.wait()
if returncode == 0:
print('Docking pocket grid created for: \n'+prmfile+'\n')
else: raise Exception('Cavity creation failed')

if __name__ == "__main__":
import argparse
Expand Down
7 changes: 2 additions & 5 deletions sampledock/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
rdBase.DisableLog('rdApp.error')
from .jtvae import Vocab, JTNNVAE
# Sample and Dock tools
from .SnD import prep_prm
from .SnD.pocket_prepare import prep_prm, create_cav
from .SnD import dock, sort_pose, save_pose
from .SnD import hyperparam_loader, create_wd, smiles_to_sdfile
from .SnD import single_generator, distributed_generator
Expand Down Expand Up @@ -61,10 +61,7 @@
prmfile, cav_dir = prep_prm(p.receptor_file,p.ligand_file,p.receptor_name,wd)

## create pocket
cmdline = p.cavity_protocol+' %s > %s/create_cavity.out'%(prmfile,cav_dir)
proc = subprocess.Popen(cmdline, shell=True)
proc.wait()
print('Docking pocket grid created')
create_cav(prmfile)

## Main loop: VAE on subsequent returned compounds
for j in range(p.ncycle):
Expand Down
15 changes: 12 additions & 3 deletions sampledock/jtvae/chemutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def copy_edit_mol(mol):

def get_clique_mol(mol, atoms):
smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
## Comment by Truman 6/23/2021
## The above code will not longer work on this perticular task
## with newer versions of rdkit (starting from v2021.03.1 and hopefully ends with 03.4?)
## See issue https://github.com/rdkit/rdkit/issues/3998
## and https://github.com/chemprop/chemprop/pull/182
## The work on the patch is to be released in 2021.03.4
new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
new_mol = copy_edit_mol(new_mol).GetMol()
new_mol = sanitize(new_mol) #We assume this is not None
Expand Down Expand Up @@ -118,7 +124,9 @@ def tree_decomp(mol):
cnei = nei_list[atom]
bonds = [c for c in cnei if len(cliques[c]) == 2]
rings = [c for c in cnei if len(cliques[c]) > 4]
if len(bonds) > 2 or (len(bonds) == 2 and len(cnei) > 2): #In general, if len(cnei) >= 3, a singleton should be added, but 1 bond + 2 ring is currently not dealt with.
if len(bonds) > 2 or (len(bonds) == 2 and len(cnei) > 2):
# In general, if len(cnei) >= 3, a singleton should be added,
# but 1 bond + 2 ring is currently not dealt with.
cliques.append([atom])
c2 = len(cliques) - 1
for c1 in cnei:
Expand All @@ -134,9 +142,10 @@ def tree_decomp(mol):
c1,c2 = cnei[i],cnei[j]
inter = set(cliques[c1]) & set(cliques[c2])
if edges[(c1,c2)] < len(inter):
edges[(c1,c2)] = len(inter) #cnei[i] < cnei[j] by construction
edges[(c1,c2)] = len(inter)
#cnei[i] < cnei[j] by construction

edges = [u + (MST_MAX_WEIGHT-v,) for u,v in edges.items()] #Changed from .iteritems() to .items() by Truman for python 3.7
edges = [u + (MST_MAX_WEIGHT-v,) for u,v in edges.items()]
if len(edges) == 0:
return cliques, edges

Expand Down
13 changes: 8 additions & 5 deletions sampledock/jtvae/jtnn_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ def __init__(self, vocab, hidden_size, latent_size, depthT, depthG):
self.A_assm = nn.Linear(latent_size, hidden_size, bias=False)
self.assm_loss = nn.CrossEntropyLoss(reduction='sum')

## nn.Linear: Applies a linear transformation to the incoming data (y = xA^T + b)
## Not sure how the biases and weights are specified this way
self.T_mean = nn.Linear(hidden_size, latent_size)
self.T_var = nn.Linear(hidden_size, latent_size)
self.G_mean = nn.Linear(hidden_size, latent_size)
Expand Down Expand Up @@ -98,7 +96,9 @@ def find_ensemble(self,smiles_list):
# This is due to difference in parsing of SMILES (especially rings)
## TODO: Convert sampledock to OOP structure and use the vectors directly
except KeyError as key:
print('[KeyError]',key,'is not part of the vocabulary (the model was not trained with this scaffold)')
print('[KeyError]',key,\
'is not part of the vocabulary \
(the model was not trained with this scaffold)')
continue
tree_mean = self.T_mean(x_tree)
tree_log_var = -torch.abs(self.T_var(x_tree))
Expand Down Expand Up @@ -223,8 +223,11 @@ def decode(self, x_tree_vecs, x_mol_vecs, prob_decode):

cur_mol = cur_mol.GetMol()
set_atommap(cur_mol)
cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol))
return Chem.MolToSmiles(cur_mol) if cur_mol is not None else None
try:
Chem.SanitizeMol(cur_mol)
return Chem.MolToSmiles(cur_mol)
except rdkit.Chem.rdchem.AtomKekulizeException:
return None

def dfs_assemble(self, y_tree_mess, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node, prob_decode, check_aroma):
fa_nid = fa_node.nid if fa_node is not None else -1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name='sampledock',
version='0.5',
version='0.5.1',
description='Molecular design framework the merges generative AI and molecular docking',
author='Ziqiao Xu and Aaron Frank',
author_email='afrankz@umich.edu',
Expand Down
24 changes: 14 additions & 10 deletions test_hyper.param
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
## All relative path anchors on the location of working directory, not this file
## All relative path starts from the location of working directory not this file

############### Parameters for SnD #####################
receptor_name = CDK2-5IEV # name the receptor to create dir
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2 # must be mol2 format
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd # must be sd format
ncycle = 2 # number of cycles to be run
ndesign = 10 # number of designs to be generated per cycle
ensemble = 5 # top of number of design to generate the average structure
receptor_name = CDK2_5IEV # name the receptor to create dir
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2
# must be mol2 format
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd
# must be sd format
ncycle = 5 # number of cycles to be run
ndesign = 20 # number of designs to be generated per cycle
ensemble = 1 # number of top designs to generate the average structure
# (1 being just the top scoring structure)
seed_smi = C1=CC=CC=C1 # initial seeding SMILES for the first cycle, default to benzene
nseeds = 1 # number of top designs to be used as seeds for distributed generation
# nseeds overrides ensemble
seed_smi = C1=CC=CC=C1
# initial seeding SMILES for the first cycle, default to benzene

############### Parameters for rDock ###################
cavity_protocol = rbcavity -was -d -r # cmd and option for creating pocket
docking_prm = dock.prm # docking protocol (-p), no solvation term by default
npose = 10 # number of poses generated (-n)
npose = 20 # number of poses generated (-n)
prefix = pose_docked_ # prefix of the output files from rDock
sort_by = SCORE.INTER # filter for sorting the designs

Expand Down

0 comments on commit 72e60e3

Please sign in to comment.