-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchemistry_helpers.py
117 lines (101 loc) · 3.83 KB
/
chemistry_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import logging
from pathlib import Path
# from mordred import Calculator, descriptors
from rdkit import Chem
from rdkit.Chem import Descriptors, rdFingerprintGenerator
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.MolStandardize import rdMolStandardize
def molecule_svg(smiles: str, highlight: str | None, width: int = 250):
mol = Chem.MolFromSmiles(smiles)
d2d = rdMolDraw2D.MolDraw2DSVG(width, width)
if highlight is not None:
explicit_h = highlight is None or "[H]" in highlight
p = Chem.SmilesParserParams()
p.removeHs = not explicit_h
if explicit_h:
mol = Chem.AddHs(mol)
highlight_atoms = mol.GetSubstructMatch(Chem.MolFromSmiles(highlight, p))
draw_options = d2d.drawOptions()
draw_options.setHighlightColour((0.1, 0.9, 0.9, 0.8))
d2d.DrawMolecule(mol, highlightAtoms=highlight_atoms)
else:
d2d.DrawMolecule(mol)
d2d.FinishDrawing()
return d2d.GetDrawingText()
def fingerprint(mol):
return fpgen.GetFingerprint(mol)
# # from https://github.com/mordred-descriptor/mordred/tree/develop/examples
# def get_mol_descriptors_mordred(mol):
# return calc(mol).drop_missing().asdict()
# from https://greglandrum.github.io/rdkit-blog/posts/2022-12-23-descriptor-tutorial.html
def get_mol_descriptors_rdkit(mol):
"""calculate the full list of descriptors for a molecule."""
res = {}
for nm, fn in Descriptors._descList:
# some of the descriptor functions can throw errors if they fail, catch those here:
try:
val = fn(mol)
except:
# print the error message:
import traceback
traceback.print_exc()
# and set the descriptor value to None
val = None
res[nm] = val
return res
def standardize(mol):
clean_mol = rdMolStandardize.Cleanup(mol)
bigger_clean = rdMolStandardize.FragmentParent(clean_mol)
bigger_clean = uncharger.uncharge(bigger_clean)
return bigger_clean
def process_smiles(inp):
smiles = "Input to process_smiles is invalid"
try:
nid, smiles = inp
mol = Chem.MolFromSmiles(smiles)
smol = standardize(mol)
if smol is not None:
smiles_clean = Chem.MolToSmiles(smol)
inchi_clean = Chem.inchi.MolToInchi(smol)
inchikey_clean = Chem.inchi.MolToInchiKey(smol)
formula_clean = Chem.rdMolDescriptors.CalcMolFormula(smol)
mol_block = Chem.MolToMolBlock(smol)
sim_fp = fingerprint(smol)
sub_fp = Chem.PatternFingerprint(smol)
# desc_mordred = get_mol_descriptors_mordred(smol)
desc_rdkit = get_mol_descriptors_rdkit(smol)
smol_h = Chem.AddHs(smol)
sim_fp_h = fingerprint(smol_h)
sub_fp_h = Chem.PatternFingerprint(smol_h)
Chem.RemoveStereochemistry(smol)
smiles_no_stereo = Chem.MolToSmiles(smol)
inchi_no_stereo = Chem.inchi.MolToInchi(smol)
inchikey_no_stereo = Chem.inchi.MolToInchiKey(smol)
return (
nid,
smiles,
smol,
smiles_clean,
inchi_clean,
inchikey_clean,
formula_clean,
mol_block,
sim_fp,
sub_fp,
# desc_mordred,
desc_rdkit,
smol_h.ToBinary(),
sim_fp_h,
sub_fp_h,
smiles_no_stereo,
inchi_no_stereo,
inchikey_no_stereo,
)
else:
return None
except:
logging.error(f"Failed to process: {smiles}")
return None
# calc = Calculator(descriptors)
fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
uncharger = rdMolStandardize.Uncharger()