Skip to content

Commit

Permalink
add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
klausweinbauer committed Aug 21, 2024
1 parent cfc2a80 commit e28d185
Show file tree
Hide file tree
Showing 17 changed files with 560 additions and 25 deletions.
11 changes: 11 additions & 0 deletions doc/figure_scripts/caffeine_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import matplotlib.pyplot as plt
from fgutils import Parser
from fgutils.vis import plot_as_mol

parser = Parser()
mol = parser("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")

fig, ax = plt.subplots(1, 1, dpi=200)
plot_as_mol(mol, ax)
plt.savefig("doc/figures/caffeine_example.png", bbox_inches="tight", transparent=True)

41 changes: 41 additions & 0 deletions doc/figure_scripts/diels_alder_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import matplotlib.pyplot as plt
from fgutils.proxy import ProxyGroup, ProxyGraph, ReactionProxy
from fgutils.proxy_collection.common import common_groups
from fgutils.vis import plot_reaction


electron_donating_group = ProxyGroup(
"electron_donating_group", pattern="{alkyl,aryl,amine}"
)
electron_withdrawing_group = ProxyGroup(
"electron_withdrawing_group",
pattern="{alkohol,ether,aldehyde,ester,nitrile}",
)
diene_group = ProxyGroup(
"diene",
ProxyGraph("C<2,1>C<1,2>C<2,1>C{electron_donating_group}", anchor=[0, 3]),
)
dienophile_group = ProxyGroup(
"dienophile",
ProxyGraph("C<2,1>C{electron_withdrawing_group}", anchor=[0, 1]),
)
groups = common_groups + [
electron_donating_group,
electron_withdrawing_group,
diene_group,
dienophile_group,
]

proxy = ReactionProxy("{diene}1<0,1>{dienophile}<0,1>1", groups)

r, c = 3, 2
fig, ax = plt.subplots(r, c, dpi=400)
for ri in range(r):
for ci in range(c):
g, h = next(proxy)
ax[ri, ci].axis("off")
plot_reaction(g, h, ax[ri, ci])

plt.tight_layout()
plt.savefig("doc/figures/diels_alder_example.png", bbox_inches="tight", transparent=True)
plt.show()
20 changes: 20 additions & 0 deletions doc/figure_scripts/labeled_node_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import matplotlib.pyplot as plt
from fgutils import Parser
from fgutils.proxy import MolProxy, ProxyGroup
from fgutils.vis import plot_graph

pattern = "CC(=O)O{propyl}"
propyl_group = ProxyGroup("propyl", pattern="CCC")
parser = Parser()
proxy = MolProxy(pattern, propyl_group, parser=parser)

g = parser(pattern)
mol = next(proxy)

fig, ax = plt.subplots(1, 2, dpi=100, figsize=(12, 4))
plot_graph(g, ax[0], show_labels=True)
plot_graph(mol, ax[1])
plt.savefig(
"doc/figures/labeled_node_example.png", bbox_inches="tight", transparent=True
)
plt.show()
14 changes: 14 additions & 0 deletions doc/figure_scripts/simple_its_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import matplotlib.pyplot as plt
from fgutils import Parser
from fgutils.proxy import Proxy
from fgutils.vis import plot_its, plot_graph


pattern = "C1<2,1>C<1,2>C<2,1>C(C)<0,1>C<2,1>C(O)<0,1>1"
parser = Parser()
g = parser(pattern)

fig, ax = plt.subplots(1, 1)
plot_graph(g, ax)
plt.savefig("doc/figures/simple_its_example.png", bbox_inches="tight", transparent=True)
plt.show()
Binary file added doc/figures/caffeine_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/figures/diels_alder_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/figures/labeled_node_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/figures/simple_its_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Welcome to FGUtils's documentation!

.. toctree::
:maxdepth: 1
:caption: Contents:

pattern_syntax
references


Expand Down
121 changes: 121 additions & 0 deletions doc/pattern_syntax.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
==============
Pattern Syntax
==============

FGUtils has its own graph description language. The syntax is closely related
to the SMILES format for molecules and reactions. It is kind of an extenstion
to SMILES to support modeling ITS graphs and reaction patterns. To convert the
SMILES-like description into a graph object use the
:py:class:`~fgutils.parse.Parser` class. The Caffeine molecular graph can be
obtained as follows::

import matplotlib.pyplot as plt
from fgutils import Parser
from fgutils.vis import plot_as_mol

parser = Parser()
mol = parser("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")

fig, ax = plt.subplots(1, 1)
plot_as_mol(mol, ax)
plt.show()

.. image:: figures/caffeine_example.png
:width: 300

Besides parsing common SMILES it is possible to generate molecule-like graphs
with more abstract nodes, i.e., arbitrary node labels. Arbitrary node labels
are surrounded by ``{}`` (e.g. ``{label}``). This abstract labeling can be used
to substitute nodes with specific patterns. This can be done by using a
:py:class:`~fgutils.proxy.Proxy`. Propyl acetate can be created by replacing
the labeled node with the propyl group::

import matplotlib.pyplot as plt
from fgutils import Parser
from fgutils.proxy import MolProxy, ProxyGroup
from fgutils.vis import plot_graph

pattern = "CC(=O)O{propyl}"
propyl_group = ProxyGroup("propyl", pattern="CCC")
parser = Parser()
proxy = MolProxy(pattern, propyl_group, parser=parser)

g = parser(pattern)
mol = next(proxy)

fig, ax = plt.subplots(1, 2, dpi=100, figsize=(12, 4))
plot_graph(g, ax[0], show_labels=True)
plot_graph(mol, ax[1])
plt.show()

.. image:: figures/labeled_node_example.png
:width: 600


.. note::

A node can have more than one label. This can be done by separating the
labels with a comma, e.g.: ``{label_1,label_2}``.

Another extension to the SMILES notation is the encoding of bond changes. This
feature is required to model reaction mechanisms as ITS graph. Changing bonds
are surrounded by ``<>`` (e.g. ``<1, 2>`` for the formation of a double bond
from a single bond). The extended notation allows the automated generation of
reaction examples with complete atom-to-atom maps. The following code snippet
demonstrates the generation of a few Diels-Alder reactions. The ``diene`` and
``dienophile`` groups can of course be extended to increase varaity of the
samples::


import matplotlib.pyplot as plt
from fgutils.proxy import ProxyGroup, ProxyGraph, ReactionProxy
from fgutils.proxy_collection.common import common_groups
from fgutils.vis import plot_reaction


electron_donating_group = ProxyGroup(
"electron_donating_group", pattern="{alkyl,aryl,amine}"
)
electron_withdrawing_group = ProxyGroup(
"electron_withdrawing_group",
pattern="{alkohol,ether,aldehyde,ester,nitrile}",
)
diene_group = ProxyGroup(
"diene",
ProxyGraph("C<2,1>C<1,2>C<2,1>C{electron_donating_group}", anchor=[0, 3]),
)
dienophile_group = ProxyGroup(
"dienophile",
ProxyGraph("C<2,1>C{electron_withdrawing_group}", anchor=[0, 1]),
)
groups = common_groups + [
electron_donating_group,
electron_withdrawing_group,
diene_group,
dienophile_group,
]

proxy = ReactionProxy("{diene}1<0,1>{dienophile}<0,1>1", groups)

r, c = 3, 2
fig, ax = plt.subplots(r, c, dpi=400)
for ri in range(r):
for ci in range(c):
g, h = next(proxy)
ax[ri, ci].axis("off")
plot_reaction(g, h, ax[ri, ci])
plt.tight_layout()
plt.show()

.. image:: figures/diels_alder_example.png
:width: 1000

.. note::

The ``electron_donating_group`` and ``electron_withdrawing_group`` serve as
a collection of other groups to simplify the notation. They consist of a
single node with multiple labels. When iterating the next sample from the
proxy (``next(proxy)``) the labeled nodes get replaced by the pattern from
one of the groups. The group/label is chosen randomly with uniform
distribution.

9 changes: 8 additions & 1 deletion doc/references.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@
References
==========

Proxy
parse
=====

.. automodule:: fgutils.parse
:members:


proxy
=====

.. automodule:: fgutils.proxy
Expand Down
1 change: 1 addition & 0 deletions fgutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .permutation import PermutationMapper
from .query import FGQuery
from .proxy import ReactionProxy
from .parse import Parser
89 changes: 89 additions & 0 deletions fgutils/chem/its.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import collections
import networkx as nx
import rdkit.Chem as Chem

from fgutils.rdkit import graph_to_mol
from fgutils.const import SYMBOL_KEY, AAM_KEY, BOND_KEY


def _add_its_nodes(ITS, G, H, eta, symbol_key):
eta_G, eta_G_inv, eta_H, eta_H_inv = eta[0], eta[1], eta[2], eta[3]
for n, d in G.nodes(data=True):
n_ITS = eta_G[n]
n_H = eta_H_inv[n_ITS]
if n_ITS is not None and n_H is not None:
ITS.add_node(n_ITS, symbol=d[symbol_key], idx_map=(n, n_H))
for n, d in H.nodes(data=True):
n_ITS = eta_H[n]
n_G = eta_G_inv[n_ITS]
if n_ITS is not None and n_G is not None and n_ITS not in ITS.nodes:
ITS.add_node(n_ITS, symbol=d[symbol_key], idx_map=(n_G, n))


def _add_its_edges(ITS, G, H, eta, bond_key):
eta_G, eta_G_inv, eta_H, eta_H_inv = eta[0], eta[1], eta[2], eta[3]
for n1, n2, d in G.edges(data=True):
if n1 > n2:
continue
e_G = d[bond_key]
n_ITS1 = eta_G[n1]
n_ITS2 = eta_G[n2]
n_H1 = eta_H_inv[n_ITS1]
n_H2 = eta_H_inv[n_ITS2]
e_H = None
if H.has_edge(n_H1, n_H2):
e_H = H[n_H1][n_H2][bond_key]
if not ITS.has_edge(n_ITS1, n_ITS2) and n_ITS1 > 0 and n_ITS2 > 0:
ITS.add_edge(n_ITS1, n_ITS2, bond=(e_G, e_H))

for n1, n2, d in H.edges(data=True):
if n1 > n2:
continue
e_H = d[bond_key]
n_ITS1 = eta_H[n1]
n_ITS2 = eta_H[n2]
n_G1 = eta_G_inv[n_ITS1]
n_G2 = eta_G_inv[n_ITS2]
if n_G1 is None or n_G2 is None:
continue
if not G.has_edge(n_G1, n_G2) and n_ITS1 > 0 and n_ITS2 > 0:
ITS.add_edge(n_ITS1, n_ITS2, bond=(None, e_H))


def get_its(G: nx.Graph, H: nx.Graph) -> nx.Graph:
"""
Get the ITS graph of reaction G \u2192 H. G and H must be molecular graphs
with node labels 'aam' and 'symbol' and bond label 'bond'.
:param G: Reactant molecular graph.
:param H: Product molecular graph.
:returns: Returns the ITS graph.
"""
eta_G = collections.defaultdict(lambda: None)
eta_G_inv = collections.defaultdict(lambda: None)
eta_H = collections.defaultdict(lambda: None)
eta_H_inv = collections.defaultdict(lambda: None)
eta = (eta_G, eta_G_inv, eta_H, eta_H_inv)

for n, d in G.nodes(data=True):
if d is None:
raise ValueError("Graph node {} has no data.".format(n))
if AAM_KEY in d.keys() and d[AAM_KEY] >= 0:
eta_G[n] = d[AAM_KEY]
eta_G_inv[d[AAM_KEY]] = n
for n, d in H.nodes(data=True):
if d is None:
raise ValueError("Graph node {} has no data.".format(n))
if AAM_KEY in d.keys() and d[AAM_KEY] >= 0:
eta_H[n] = d[AAM_KEY]
eta_H_inv[d[AAM_KEY]] = n

ITS = nx.Graph()
_add_its_nodes(ITS, G, H, eta, SYMBOL_KEY)
_add_its_edges(ITS, G, H, eta, BOND_KEY)

return ITS


5 changes: 5 additions & 0 deletions fgutils/const.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
AAM_KEY = "aam"
SYMBOL_KEY = "symbol"
BOND_KEY = "bond"
IS_LABELED_KEY = "is_labeled"
LABELS_KEY = "labels"
Loading

0 comments on commit e28d185

Please sign in to comment.