Skip to content

Commit

Permalink
Remove openbabel dependency (#518)
Browse files Browse the repository at this point in the history
* Add tests for invalid SMILES
  • Loading branch information
danielhollas authored Sep 27, 2023
1 parent a1b5134 commit ba23f42
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 59 deletions.
77 changes: 22 additions & 55 deletions aiidalab_widgets_base/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,13 +685,6 @@ class SmilesWidget(ipw.VBox):

def __init__(self, title=""):
self.title = title

try:
from openbabel import openbabel # noqa: F401
from openbabel import pybel # noqa: F401
except ImportError:
self.disable_openbabel = True

try: # noqa: TC101
from rdkit import Chem # noqa: F401
from rdkit.Chem import AllChem # noqa: F401
Expand Down Expand Up @@ -735,30 +728,6 @@ def _make_ase(self, species, positions, smiles):

return atoms

def _pybel_opt(self, smiles, steps):
"""Optimize a molecule using force field and pybel (needed for complex SMILES)."""
from openbabel import openbabel as ob
from openbabel import pybel as pb

obconversion = ob.OBConversion()
obconversion.SetInFormat("smi")
obmol = ob.OBMol()
obconversion.ReadString(obmol, smiles)

pbmol = pb.Molecule(obmol)
pbmol.make3D(forcefield="uff", steps=50)

pbmol.localopt(forcefield="gaff", steps=200)
pbmol.localopt(forcefield="mmff94", steps=100)

f_f = pb._forcefields["uff"]
f_f.Setup(pbmol.OBMol)
f_f.ConjugateGradients(steps, 1.0e-9)
f_f.GetCoordinates(pbmol.OBMol)
species = [ase.data.chemical_symbols[atm.atomicnum] for atm in pbmol.atoms]
positions = np.asarray([atm.coords for atm in pbmol.atoms])
return self._make_ase(species, positions, smiles)

def _rdkit_opt(self, smiles, steps):
"""Optimize a molecule using force field and rdkit (needed for complex SMILES)."""
from rdkit import Chem
Expand Down Expand Up @@ -795,28 +764,20 @@ def _rdkit_opt(self, smiles, steps):
return self._make_ase(species, positions, smiles)

def _mol_from_smiles(self, smiles, steps=1000):
"""Convert SMILES to ase structure try rdkit then pybel"""

# Canonicalize the SMILES code
# https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system#Terminology
canonical_smiles = self.canonicalize_smiles(smiles)
if not canonical_smiles:
return None

if canonical_smiles != smiles:
self.output.value = f"Canonical SMILES: {canonical_smiles}"

"""Convert SMILES to ASE structure using RDKit"""
try:
return self._rdkit_opt(canonical_smiles, steps)
canonical_smiles = self.canonicalize_smiles(smiles)
ase = self._rdkit_opt(canonical_smiles, steps)
except ValueError as e:
self.output.value = str(e)
if self.disable_openbabel:
return None
self.output.value += " Trying OpenBabel..."
return self._pybel_opt(smiles, steps)
return None
else:
if canonical_smiles != smiles:
self.output.value = f"Canonical SMILES: {canonical_smiles}"
return ase

def _on_button_pressed(self, change=None):
"""Convert SMILES to ase structure when button is pressed."""
"""Convert SMILES to ASE structure when button is pressed."""
self.output.value = ""

if not self.smiles.value:
Expand All @@ -829,19 +790,25 @@ def _on_button_pressed(self, change=None):
if self.output.value == spinner:
self.output.value = ""

def canonicalize_smiles(self, smiles):
# https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system#Terminology
@staticmethod
def canonicalize_smiles(smiles: str) -> str:
"""Canonicalize the SMILES code.
:raises ValueError: if SMILES is invalid or if canonicalization fails
"""
from rdkit import Chem

mol = Chem.MolFromSmiles(smiles, sanitize=True)
if mol is None:
# Something is seriously wrong with the SMILES code,
# just return None and don't attempt anything else.
self.output.value = "RDkit ERROR: Invalid SMILES string"
return None
# Something is seriously wrong with the SMILES code
msg = "Invalid SMILES string"
raise ValueError(msg)

canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
if not canonical_smiles:
self.output.value = "RDkit ERROR: Could not canonicalize SMILES"
return None
msg = "SMILES canonicalization failed"
raise ValueError(msg)
return canonical_smiles

@tl.default("structure")
Expand Down
19 changes: 15 additions & 4 deletions tests/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,21 +148,32 @@ def test_smiles_widget():
assert isinstance(widget.structure, ase.Atoms)
assert widget.structure.get_chemical_formula() == "N2"

# Should not raise for invalid smiles
widget.smiles.value = "invalid"
widget._on_button_pressed()
assert widget.structure is None


@pytest.mark.usefixtures("aiida_profile_clean")
def test_smiles_canonicalization():
"""Test the SMILES canonicalization via RdKit."""
widget = awb.SmilesWidget()
canonicalize = awb.SmilesWidget.canonicalize_smiles

# Should not change canonical smiles
assert widget.canonicalize_smiles("C") == "C"
assert canonicalize("C") == "C"

# Should canonicalize this
canonical = widget.canonicalize_smiles("O=CC=C")
canonical = canonicalize("O=CC=C")
assert canonical == "C=CC=O"

# Should be idempotent
assert canonical == widget.canonicalize_smiles(canonical)
assert canonical == canonicalize(canonical)

# Should raise for invalid smiles
with pytest.raises(ValueError):
canonicalize("invalid")
# There is another failure mode when RDkit mol object is generated
# but the canonicalization fails. I do not know how to trigger it though.


@pytest.mark.usefixtures("aiida_profile_clean")
Expand Down

0 comments on commit ba23f42

Please sign in to comment.