Skip to content

Commit

Permalink
Allow custom target bond definitions (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonBoothroyd authored Jul 29, 2021
1 parent 1c1d15c commit dd00ab8
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 44 deletions.
122 changes: 88 additions & 34 deletions openff/fragmenter/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,43 +290,70 @@ def _find_functional_groups(
return found_groups

@classmethod
def _find_rotatable_bonds(cls, molecule: Molecule) -> List[BondTuple]:
def find_rotatable_bonds(
cls, molecule: Molecule, target_bond_smarts: Optional[List[str]]
) -> List[BondTuple]:
"""Finds the rotatable bonds in a molecule *including* rotatable double
bonds.
Notes
-----
* This does not find terminal rotatable bonds such as -OH, -NH2 -CH3.
Todos
-----
* Add the option to build fragments around terminal torsions (-OH, -NH2, -CH3)
Parameters
----------
molecule
The molecule to search for rotatable bonds.
target_bond_smarts
An optional list of SMARTS patterns that should be used to identify the bonds
within the parent molecule to grow fragments around. Each SMARTS pattern
should include **two** indexed atoms that correspond to the two atoms
involved in the central bond.
If no pattern is provided fragments will be constructed around all 'rotatable
bonds'. A 'rotatable bond' here means any bond matched by a
`[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added
constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of
both atoms in the bond must be >= 2.
Returns
-------
A list of the rotatable map indices of the atoms which form the rotatable
A list of the **map** indices of the atoms that form the rotatable
bonds, ``[(m1, m2),...]``.
"""

matches = molecule.chemical_environment_matches(
"[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]"
)
if target_bond_smarts is None:

matches = molecule.chemical_environment_matches(
"[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]"
)

else:

matches = [
match
for smarts in target_bond_smarts
for match in molecule.chemical_environment_matches(smarts)
]

if not all(len(match) == 2 for match in matches):

raise ValueError(
f"The `target_bond_smarts` pattern ({target_bond_smarts}) "
f"must define exactly two indexed atoms to match."
)

unique_matches = {tuple(sorted(match)) for match in matches}

# Drop bonds without a heavy degree of at least 2 on each end to avoid finding
# terminal bonds
def heavy_degree(atom_index: int) -> int:
atom = molecule.atoms[atom_index]
return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1)
if target_bond_smarts is None:

unique_matches = {
match for match in unique_matches if all(heavy_degree(i) > 1 for i in match)
}
# Drop bonds without a heavy degree of at least 2 on each end to avoid
# finding terminal bonds
def heavy_degree(atom_index: int) -> int:
atom = molecule.atoms[atom_index]
return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1)

unique_matches = {
match
for match in unique_matches
if all(heavy_degree(i) > 1 for i in match)
}

return [
(
Expand Down Expand Up @@ -815,13 +842,20 @@ def _prepare_molecule(
return molecule, stereo, found_functional_groups, found_ring_systems

@abc.abstractmethod
def _fragment(self, molecule: Molecule) -> FragmentationResult:
def _fragment(
self,
molecule: Molecule,
target_bond_smarts: Optional[List[str]],
) -> FragmentationResult:
"""The internal implementation of ``fragment``.
Parameters
----------
molecule
The molecule to fragment.
target_bond_smarts
An optional SMARTS pattern that should be used to identify the bonds within
the parent molecule to grow fragments around.
Returns
-------
Expand All @@ -834,6 +868,7 @@ def _fragment(self, molecule: Molecule) -> FragmentationResult:
def fragment(
self,
molecule: Molecule,
target_bond_smarts: Optional[List[str]] = None,
toolkit_registry: Optional[Union[ToolkitRegistry, ToolkitWrapper]] = None,
) -> FragmentationResult:
"""Fragments a molecule according to this class' settings.
Expand All @@ -847,6 +882,18 @@ def fragment(
----------
molecule
The molecule to fragment.
target_bond_smarts
An optional list of SMARTS patterns that should be used to identify the bonds
within the parent molecule to grow fragments around. Each SMARTS pattern
should include **two** indexed atoms that correspond to the two atoms
involved in the central bond.
If no pattern is provided fragments will be constructed around all 'rotatable
bonds'. A 'rotatable bond' here means any bond matched by a
`[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added
constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of
both atoms in the bond must be >= 2. Note this will not find terminal
rotatable bonds such as -OH, -NH2 -CH3.
toolkit_registry
The underlying cheminformatics toolkits to use for things like conformer
generation, WBO computation etc. If no value is provided, the current
Expand All @@ -864,13 +911,19 @@ def fragment(

with global_toolkit_registry(toolkit_registry):

result = self._fragment(molecule)
result = self._fragment(molecule, target_bond_smarts)

result.provenance["toolkits"] = [
(toolkit.__class__.__name__, toolkit.toolkit_version)
for toolkit in GLOBAL_TOOLKIT_REGISTRY.registered_toolkits
]

if "options" not in result.provenance:
result.provenance["options"] = {}

if target_bond_smarts is not None:
result.provenance["options"]["target_bond_smarts"] = target_bond_smarts

return result

def _default_provenance(self) -> Dict[str, Any]:
Expand All @@ -881,6 +934,7 @@ def _default_provenance(self) -> Dict[str, Any]:
"version": openff.fragmenter.__version__,
"options": self.dict(),
}

return provenance


Expand Down Expand Up @@ -930,7 +984,9 @@ class WBOFragmenter(Fragmenter):
"threshold.",
)

def _fragment(self, molecule: Molecule) -> FragmentationResult:
def _fragment(
self, molecule: Molecule, target_bond_smarts: Optional[List[str]]
) -> FragmentationResult:
"""Fragments a molecule in such a way that the WBO of the bond that a fragment
is being built around does not change beyond the specified threshold.
"""
Expand All @@ -955,7 +1011,8 @@ def _fragment(self, molecule: Molecule) -> FragmentationResult:
molecule, self.wbo_options.max_conformers, self.wbo_options.rms_threshold
)

wbo_rotor_bonds = self._get_rotor_wbo(molecule)
rotatable_bonds = self.find_rotatable_bonds(molecule, target_bond_smarts)
wbo_rotor_bonds = self._get_rotor_wbo(molecule, rotatable_bonds)

fragments = {
bond: self._build_fragment(
Expand All @@ -982,11 +1039,9 @@ def _fragment(self, molecule: Molecule) -> FragmentationResult:

@classmethod
def _get_rotor_wbo(
cls, molecule: Molecule, rotor_bonds: Optional[List[BondTuple]] = None
cls, molecule: Molecule, rotor_bonds: List[BondTuple]
) -> Dict[BondTuple, float]:
"""Cache the WBO of each bond in a specific set of rotor bonds. If no list is
provided, the WBO for each rotatable bond (as defined by
``_find_rotatable_bonds``) will be returned.
"""Cache the WBO of each bond in a specific set of rotor bonds..
Parameters
----------
Expand All @@ -1006,9 +1061,6 @@ def _get_rotor_wbo(
"WBO was not calculated for this molecule. Calculating WBO..."
)

if rotor_bonds is None:
rotor_bonds = cls._find_rotatable_bonds(molecule)

rotors_wbo = {}

for bond_indices in rotor_bonds:
Expand Down Expand Up @@ -1385,7 +1437,9 @@ class PfizerFragmenter(Fragmenter):

scheme: Literal["Pfizer"] = "Pfizer"

def _fragment(self, molecule: Molecule) -> FragmentationResult:
def _fragment(
self, molecule: Molecule, target_bond_smarts: Optional[List[str]]
) -> FragmentationResult:
"""Fragments a molecule according to Pfizer protocol."""

(
Expand All @@ -1395,7 +1449,7 @@ def _fragment(self, molecule: Molecule) -> FragmentationResult:
parent_rings,
) = self._prepare_molecule(molecule, self.functional_groups, False)

rotatable_bonds = self._find_rotatable_bonds(parent)
rotatable_bonds = self.find_rotatable_bonds(parent, target_bond_smarts)

fragments = {}

Expand Down
11 changes: 8 additions & 3 deletions openff/fragmenter/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
Empty init file in case you choose a package besides PyTest such as Nose which may look for such a file
"""
from contextlib import contextmanager


@contextmanager
def does_not_raise():
"""A helpful context manager to use inplace of a pytest raise statement
when no exception is expected."""
yield
51 changes: 44 additions & 7 deletions openff/fragmenter/tests/test_fragmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
PfizerFragmenter,
WBOFragmenter,
)
from openff.fragmenter.tests import does_not_raise
from openff.fragmenter.tests.utils import (
key_smarts_to_map_indices,
smarts_set_to_map_indices,
Expand Down Expand Up @@ -140,9 +141,9 @@ def test_find_functional_groups(potanib):
)


def test_find_rotatable_bonds(abemaciclib):
def test_find_rotatable_bonds_default(abemaciclib):

rotatable_bonds = Fragmenter._find_rotatable_bonds(abemaciclib)
rotatable_bonds = Fragmenter.find_rotatable_bonds(abemaciclib, None)
assert len(rotatable_bonds) == 7

expected_rotatable_bonds = smarts_set_to_map_indices(
Expand All @@ -161,6 +162,29 @@ def test_find_rotatable_bonds(abemaciclib):
assert {*rotatable_bonds} == expected_rotatable_bonds


@pytest.mark.parametrize(
"smarts, expected_raises",
[
(["[#6:1]-[#6:2]"], does_not_raise()),
(
["[#6]-[#6]"],
pytest.raises(ValueError, match="The `target_bond_smarts` pattern "),
),
],
)
def test_find_rotatable_bonds_custom(smarts, expected_raises):

ethane = Molecule.from_smiles("CC")
ethane.properties["atom_map"] = {i: i + 1 for i in range(ethane.n_atoms)}

with expected_raises:

rotatable_bonds = Fragmenter.find_rotatable_bonds(ethane, smarts)
assert len(rotatable_bonds) == 1

assert rotatable_bonds == [(1, 2)]


def test_atom_bond_set_to_mol(abemaciclib):

molecule = smiles_to_molecule(abemaciclib.to_smiles(mapped=False), True)
Expand Down Expand Up @@ -417,17 +441,24 @@ def test_prepare_molecule():
)
def test_fragmenter_provenance(toolkit_registry, expected_provenance):
class DummyFragmenter(Fragmenter):
def _fragment(self, molecule: Molecule) -> FragmentationResult:
def _fragment(
self, molecule: Molecule, target_bond_smarts: str
) -> FragmentationResult:

return FragmentationResult(
parent_smiles="[He:1]", fragments=[], provenance={}
)

result = DummyFragmenter().fragment(Molecule.from_smiles("[He]"), toolkit_registry)
result = DummyFragmenter().fragment(
Molecule.from_smiles("[He]"), ["[*:1]~[*:2]"], toolkit_registry
)

assert "toolkits" in result.provenance
assert [name for name, _ in result.provenance["toolkits"]] == expected_provenance

assert "options" in result.provenance
assert result.provenance["options"]["target_bond_smarts"] == ["[*:1]~[*:2]"]


def test_wbo_fragment():
"""Test build fragment"""
Expand Down Expand Up @@ -462,7 +493,9 @@ def test_get_rotor_wbo():
for match in molecule.chemical_environment_matches("[#6:1]-[#6:2]")
}

rotors_wbo = WBOFragmenter._get_rotor_wbo(molecule)
rotors_wbo = WBOFragmenter._get_rotor_wbo(
molecule, WBOFragmenter.find_rotatable_bonds(molecule, None)
)

assert len(rotors_wbo) == 1

Expand All @@ -475,7 +508,9 @@ def test_get_rotor_wbo():
def test_compare_wbo():

parent = assign_elf10_am1_bond_orders(smiles_to_molecule("CCCC", add_atom_map=True))
rotors_wbo = WBOFragmenter._get_rotor_wbo(parent)
rotors_wbo = WBOFragmenter._get_rotor_wbo(
parent, WBOFragmenter.find_rotatable_bonds(parent, None)
)

fragment = smiles_to_molecule("CCCC", add_atom_map=True)

Expand All @@ -496,7 +531,9 @@ def test_compare_wbo():
def test_build_fragment():

parent = assign_elf10_am1_bond_orders(smiles_to_molecule("CCCCCC", True))
rotors_wbo = WBOFragmenter._get_rotor_wbo(parent)
rotors_wbo = WBOFragmenter._get_rotor_wbo(
parent, WBOFragmenter.find_rotatable_bonds(parent, None)
)

fragments = {
bond: WBOFragmenter._build_fragment(
Expand Down

0 comments on commit dd00ab8

Please sign in to comment.