TieuLongPhan
diff --git a/‎.coverage
0 Bytes b/‎.coverage
0 Bytes
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎synutility/SynGraph/Morphism/__init__.py renamed to ‎CHANGELOG.md b/‎synutility/SynGraph/Morphism/__init__.py renamed to ‎CHANGELOG.md
diff --git a/‎Data/Testcase/graph.pkl.gz
1.13 MB b/‎Data/Testcase/graph.pkl.gz
1.13 MB
diff --git a/‎Data/Testcase/mech.json.gz
Lines changed: 1 addition & 0 deletions b/‎Data/Testcase/mech.json.gz
Lines changed: 1 addition & 0 deletions
diff --git a/‎Data/smart.json.gz
Lines changed: 1 addition & 0 deletions b/‎Data/smart.json.gz
Lines changed: 1 addition & 0 deletions
diff --git a/‎Test/SynAAM/test_partial_expand.py
Lines changed: 2 additions & 2 deletions b/‎Test/SynAAM/test_partial_expand.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎Test/SynGraph/Cluster/__init__.py b/‎Test/SynGraph/Cluster/__init__.py
diff --git a/‎Test/SynGraph/Cluster/test_batch_cluster.py
Lines changed: 109 additions & 0 deletions b/‎Test/SynGraph/Cluster/test_batch_cluster.py
Lines changed: 109 additions & 0 deletions
diff --git a/‎Test/SynGraph/Cluster/test_graph_cluster.py
Lines changed: 136 additions & 0 deletions b/‎Test/SynGraph/Cluster/test_graph_cluster.py
Lines changed: 136 additions & 0 deletions
diff --git a/‎Test/SynGraph/GML/__init__.py b/‎Test/SynGraph/GML/__init__.py
@@ -8,3 +8,4 @@ test_mod.py
 test_format.py
 *dev_zone
 test_format.py
+ITS_graphs.pkl.gz
@@ -0,0 +1 @@
+[{"R-id": "Mech-1", "reaction": "[CH3:1][CH:2]=[O:3].[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "mechanisms": [{"mech_name": "Aldol reaction (base cat)", "steps": [{"description": "Base abstracts H from substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[*-:9]>>[CH-:4]([H:8])[CH:5]=[O:6].[*:9][H:7]", "step": 1, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n   ]\n   context [\n   ]\n   right [\n   ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Nucleophilic addition fro substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH-:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"=\" ]\n      node [ id 2 label \"O\" ]\n      node [ id 3 label \"C-\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n   ]\n   right [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 1 target 3 label \"-\" ]\n      node [ id 2 label \"O-\" ]\n      node [ id 3 label \"C\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH3-:3]>>[CH2:1]([O-:2])[CH3:3]", "step_desc": "Nucleophilic addition fro substrate"}, {"description": "Neutralize substrate", "smart_string": "[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6].[*:9][H:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]", "step": 3, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n   ]\n   context [\n   ]\n   right [\n   ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize substrate"}, {"description": "Base abstracts H from substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]>>[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6].[*:9][H:8]", "step": 4, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n   ]\n   context [\n   ]\n   right [\n   ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Elimination Unimolecular Conjugate Base", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O-:3][H:7]", "step": 5, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 1 target 3 label \"-\" ]\n      node [ id 2 label \"O\" ]\n      node [ id 3 label \"C-\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n   ]\n   right [\n      edge [ source 1 target 3 label \"=\" ]\n      node [ id 2 label \"O-\" ]\n      node [ id 3 label \"C\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH2-:3]>>[CH2:1]=[CH2:3].[OH-:2]", "step_desc": "Elimination Unimolecular Conjugate Base"}, {"description": "Neutralize Hydroxide", "smart_string": "[O-:3][H:7].[*:9][H:8]>>[O:3]([H:7])([H:8]).[*-:9]", "step": 6, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n   ]\n   context [\n   ]\n   right [\n   ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize Hydroxide"}], "cat": "[*-]"}, {"mech_name": "Aldol reaction (neutral cat)", "steps": [{"description": "Tautomerization of substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", "step": 1, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 2 target 3 label \"=\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n      node [ id 2 label \"C\" ]\n      node [ id 3 label \"O\" ]\n   ]\n   right [\n      edge [ source 1 target 2 label \"=\" ]\n      edge [ source 2 target 3 label \"-\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"=\" ]\n      edge [ source 3 target 4 label \"=\" ]\n      edge [ source 4 target 5 label \"-\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n      node [ id 2 label \"O\" ]\n      node [ id 3 label \"C\" ]\n      node [ id 4 label \"C\" ]\n      node [ id 5 label \"O\" ]\n   ]\n   right [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 1 target 3 label \"-\" ]\n      edge [ source 3 target 4 label \"-\" ]\n      edge [ source 4 target 5 label \"=\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH2:3]=[CH:4][OH:5]>>[CH2:1]([OH:2])[CH2:3][CH:4]=[O:5]", "step_desc": "Nucleophilic addition from substrate"}, {"description": "Tautomerization of substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])", "step": 3, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 2 target 3 label \"=\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n      node [ id 2 label \"C\" ]\n      node [ id 3 label \"O\" ]\n   ]\n   right [\n      edge [ source 1 target 2 label \"=\" ]\n      edge [ source 2 target 3 label \"-\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "step": 4, "step_gml": "rule [\n   ruleID \"2\"\n   left [\n      edge [ source 1 target 2 label \"-\" ]\n      edge [ source 1 target 3 label \"-\" ]\n      edge [ source 3 target 4 label \"=\" ]\n      edge [ source 4 target 5 label \"-\" ]\n   ]\n   context [\n      node [ id 1 label \"C\" ]\n      node [ id 2 label \"O\" ]\n      node [ id 3 label \"C\" ]\n      node [ id 4 label \"C\" ]\n      node [ id 5 label \"O\" ]\n   ]\n   right [\n      edge [ source 1 target 3 label \"=\" ]\n      edge [ source 3 target 4 label \"-\" ]\n      edge [ source 4 target 5 label \"=\" ]\n   ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH:3]=[CH:4][OH:5]>>[CH2:1]=[CH:3][CH:4]=[O:5].[OH2:2]", "step_desc": "Elimination"}], "cat": ""}, {"mech_name": "Aldol reaction (acid cat)", "steps": [{"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[H+:9]>>[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]", "step": 1}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]", "step": 2}, {"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]", "step": 3}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[H+:9].[O:3]([H:7])([H:8])", "step": 4}], "cat": "[H+]"}]}]
@@ -18,13 +18,13 @@ def test_expand(self):
             "[CH2:1]=[CH:2][CH3:3].[H:4][H:5]>>[CH2:1]([CH:2]([CH3:3])[H:5])[H:4]"
         )
         # Perform the expansion
-        output_rsmi = PartialExpand.expand(input_rsmi)
+        output_rsmi = PartialExpand.expand_aam_with_transform(input_rsmi)
         # Assert the result matches the expected output
         self.assertTrue(AAMValidator.smiles_check(output_rsmi, expected_rsmi, "ITS"))
 
     def test_expand_2(self):
         input_rsmi = "CC[CH2:3][Cl:1].[NH2:2][H:4]>>CC[CH2:3][NH2:2].[Cl:1][H:4]"
-        output_rsmi = PartialExpand.expand(input_rsmi)
+        output_rsmi = PartialExpand.expand_aam_with_transform(input_rsmi)
         expected_rsmi = (
             "[CH3:1][CH2:2][CH2:3][Cl:4].[NH2:5][H:6]"
             + ">>[CH3:1][CH2:2][CH2:3][NH2:5].[Cl:4][H:6]"
 
@@ -0,0 +1,109 @@
+import time
+import unittest
+from synutility.SynIO.data_type import load_from_pickle
+from synutility.SynGraph.Descriptor.graph_signature import GraphSignature
+from synutility.SynGraph.Cluster.batch_cluster import BatchCluster
+
+
+class TestBatchCluster(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.graphs = load_from_pickle("Data/Testcase/graph.pkl.gz")
+        cls.templates = None
+        for value in cls.graphs:
+            value["rc_sig"] = GraphSignature(value["RC"]).create_graph_signature()
+            value["its_sig"] = GraphSignature(value["ITS"]).create_graph_signature()
+
+    def test_initialization(self):
+        """Test initialization and verify if the attributes are set correctly."""
+        cluster = BatchCluster(["element", "charge"], ["*", 0], "bond_order")
+        self.assertEqual(cluster.nodeLabelNames, ["element", "charge"])
+        self.assertEqual(cluster.nodeLabelDefault, ["*", 0])
+        self.assertEqual(cluster.edgeAttribute, "bond_order")
+
+    def test_initialization_failure(self):
+        """Test initialization failure when lengths of node labels and defaults do not match."""
+        with self.assertRaises(ValueError):
+            BatchCluster(["element"], ["*", 0, 1], "bond_order")
+
+    def test_batch_dicts(self):
+        """Test the batching function to split data correctly."""
+        batch_cluster = BatchCluster(["element", "charge"], ["*", 0], "bond_order")
+        input_list = [{"id": i} for i in range(10)]
+        batches = batch_cluster.batch_dicts(input_list, 3)
+        self.assertEqual(len(batches), 4)
+        self.assertEqual(len(batches[0]), 3)
+        self.assertEqual(len(batches[-1]), 1)
+
+    def test_lib_check_functionality(self):
+        """Test the lib_check method using directly comparable results."""
+        cluster = BatchCluster()
+        batch_1 = self.graphs[:50]
+        batch_2 = self.graphs[50:]
+        _, templates = cluster.fit(batch_1, None, "RC", "rc_sig")
+        for entry in batch_2:
+            _, templates = cluster.lib_check(entry, templates, "RC", "rc_sig")
+        self.assertEqual(len(templates), 30)
+
+    def test_cluster_integration(self):
+        """Test the cluster method to ensure it processes data entries correctly."""
+        cluster = BatchCluster()
+        expected_template_count = 30
+        _, updated_templates = cluster.cluster(self.graphs, [], "RC", "rc_sig")
+
+        self.assertEqual(
+            len(updated_templates),
+            expected_template_count,
+            f"Failed: expected {expected_template_count} templates, got {len(updated_templates)}",
+        )
+
+    def test_fit(self):
+        cluster = BatchCluster()
+        batch_sizes = [None, 10]
+        expected_template_count = 30
+
+        for batch_size in batch_sizes:
+            start_time = time.time()
+            _, updated_templates = cluster.fit(
+                self.graphs, self.templates, "RC", "rc_sig", batch_size=batch_size
+            )
+            elapsed_time = time.time() - start_time
+
+            self.assertEqual(
+                len(updated_templates),
+                expected_template_count,
+                f"Failed for batch_size={batch_size}: expected "
+                + f"{expected_template_count} templates, got {len(updated_templates)}",
+            )
+            print(
+                f"Test for batch_size={batch_size} completed in {elapsed_time:.2f} seconds."
+            )
+
+    def test_fit_gml(self):
+        cluster = BatchCluster()
+        batch_sizes = [None, 10]
+        expected_template_count = (
+            30  # Assuming this is the expected number of templates after processing
+        )
+
+        for batch_size in batch_sizes:
+            start_time = time.time()
+            _, updated_templates = cluster.fit(
+                self.graphs, self.templates, "RC", "rc_sig", batch_size=batch_size
+            )
+            elapsed_time = time.time() - start_time
+
+            self.assertEqual(
+                len(updated_templates),
+                expected_template_count,
+                f"Failed for batch_size={batch_size}: expected"
+                + f" {expected_template_count} templates, got {len(updated_templates)}",
+            )
+            print(
+                f"Test for batch_size={batch_size} completed in {elapsed_time:.2f} seconds."
+            )
+
+
+# To run the tests
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,136 @@
+import time
+import unittest
+from synutility.SynIO.data_type import load_from_pickle
+from synutility.SynGraph.Cluster.graph_cluster import GraphCluster
+from synutility.SynGraph.Descriptor.graph_descriptors import GraphDescriptor
+
+
+class TestRCCluster(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # Load data once for all tests
+        cls.graphs = load_from_pickle("Data/Testcase/graph.pkl.gz")
+        for value in cls.graphs:
+            value = GraphDescriptor.get_descriptors(value)
+        cls.clusterer = GraphCluster()
+
+    def test_initialization(self):
+        """Test the initialization and configuration of the RCCluster."""
+        self.assertIsInstance(self.clusterer.nodeLabelNames, list)
+        self.assertEqual(self.clusterer.edgeAttribute, "order")
+        self.assertEqual(
+            len(self.clusterer.nodeLabelNames), len(self.clusterer.nodeLabelDefault)
+        )
+
+    def test_auto_cluster(self):
+        """Test the auto_cluster method functionality."""
+        rc = [value["RC"] for value in self.graphs]
+        cycles = [value["cycle"] for value in self.graphs]
+        signature = [value["signature_rc"] for value in self.graphs]
+        atom_count = [value["atom_count"] for value in self.graphs]
+        for att in [None, cycles, signature, atom_count]:
+            clusters, graph_to_cluster = self.clusterer.iterative_cluster(
+                rc,
+                att,
+                nodeMatch=self.clusterer.nodeMatch,
+                edgeMatch=self.clusterer.edgeMatch,
+            )
+            self.assertIsInstance(clusters, list)
+            self.assertIsInstance(graph_to_cluster, dict)
+            self.assertEqual(len(clusters), 30)
+
+    def test_auto_cluster_wrong_isomorphism(self):
+        rc = [value["RC"] for value in self.graphs]
+        cycles = [value["cycle"] for value in self.graphs]
+        signature = [value["signature_rc"] for value in self.graphs]
+        atom_count = [value["atom_count"] for value in self.graphs]
+
+        # cluster all
+        clusters, _ = self.clusterer.iterative_cluster(
+            rc, None, nodeMatch=None, edgeMatch=None
+        )
+        self.assertEqual(len(clusters), 8)  # wrong value
+
+        # cluster with cycle
+        clusters, _ = self.clusterer.iterative_cluster(
+            rc, cycles, nodeMatch=None, edgeMatch=None
+        )
+        self.assertEqual(len(clusters), 8)  # wrong value
+
+        # cluster with atom_count
+        clusters, _ = self.clusterer.iterative_cluster(
+            rc, atom_count, nodeMatch=None, edgeMatch=None
+        )
+        self.assertEqual(len(clusters), 27)  # wrong value but almost correct
+
+        # cluster with signature
+        clusters, _ = self.clusterer.iterative_cluster(
+            rc, signature, nodeMatch=None, edgeMatch=None
+        )
+        self.assertEqual(len(clusters), 30)  # correct by some magic. No proof for this
+
+    def test_fit(self):
+        """Test the fit method to ensure it correctly updates data entries with cluster indices."""
+
+        clustered_data = self.clusterer.fit(
+            self.graphs, rule_key="RC", attribute_key="atom_count"
+        )
+        max_class = 0
+        for item in clustered_data:
+            print(item["class"])
+            max_class = item["class"] if item["class"] >= max_class else max_class
+            # print(max_class)
+            self.assertIn("class", item)
+        self.assertEqual(max_class, 29)  # 30 classes start from 0 so max is 29
+
+    def test_fit_gml(self):
+        """Test the fit method to ensure it correctly updates data entries with cluster indices."""
+
+        clustered_data = self.clusterer.fit(
+            self.graphs, rule_key="rc", attribute_key="atom_count"
+        )
+        max_class = 0
+        for item in clustered_data:
+            print(item["class"])
+            max_class = item["class"] if item["class"] >= max_class else max_class
+            # print(max_class)
+            self.assertIn("class", item)
+        self.assertEqual(max_class, 29)  # 30 classes start from 0 so max is 29
+
+    def test_fit_time_compare(self):
+        attributes = {
+            "None": None,
+            "Cycles": "cycle",
+            "Signature": "signature_rc",
+            "Atom_count": "atom_count",
+        }
+
+        results = {}
+        for name, attr in attributes.items():
+            start_time = time.time()
+            clustered_data = self.clusterer.fit(
+                self.graphs, rule_key="RC", attribute_key=attr
+            )
+            elapsed_time = time.time() - start_time
+
+            # Optionally print out class information or verify correctness
+            max_class = max(item["class"] for item in clustered_data if "class" in item)
+
+            results[name] = elapsed_time
+
+            # Basic verification that 'class' is assigned and max class is as expected
+            self.assertTrue(all("class" in item for item in clustered_data))
+            self.assertEqual(
+                max_class, 29
+            )  # Ensure the maximum class index is as expected
+
+        # Compare results to check which attribute took the least/most time
+        min_time_attr = min(results, key=results.get)
+        max_time_attr = max(results, key=results.get)
+        self.assertIn(min_time_attr, ["Atom_count", "Signature"])
+        self.assertIn(max_time_attr, ["None", "Cycles"])
+
+
+if __name__ == "__main__":
+    unittest.main()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+[{"R-id": "Mech-1", "reaction": "[CH3:1][CH:2]=[O:3].[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "mechanisms": [{"mech_name": "Aldol reaction (base cat)", "steps": [{"description": "Base abstracts H from substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[-:9]>>[CH-:4]([H:8])[CH:5]=[O:6].[:9][H:7]", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Nucleophilic addition fro substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH-:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH3-:3]>>[CH2:1]([O-:2])[CH3:3]", "step_desc": "Nucleophilic addition fro substrate"}, {"description": "Neutralize substrate", "smart_string": "[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6].[:9][H:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[-:9]", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize substrate"}, {"description": "Base abstracts H from substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[-:9]>>[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6].[:9][H:8]", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Elimination Unimolecular Conjugate Base", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O-:3][H:7]", "step": 5, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH2-:3]>>[CH2:1]=[CH2:3].[OH-:2]", "step_desc": "Elimination Unimolecular Conjugate Base"}, {"description": "Neutralize Hydroxide", "smart_string": "[O-:3][H:7].[:9][H:8]>>[O:3]([H:7])([H:8]).[-:9]", "step": 6, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize Hydroxide"}], "cat": "[*-]"}, {"mech_name": "Aldol reaction (neutral cat)", "steps": [{"description": "Tautomerization of substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH2:3]=[CH:4][OH:5]>>[CH2:1]([OH:2])[CH2:3][CH:4]=[O:5]", "step_desc": "Nucleophilic addition from substrate"}, {"description": "Tautomerization of substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH:3]=[CH:4][OH:5]>>[CH2:1]=[CH:3][CH:4]=[O:5].[OH2:2]", "step_desc": "Elimination"}], "cat": ""}, {"mech_name": "Aldol reaction (acid cat)", "steps": [{"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[H+:9]>>[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]", "step": 1}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]", "step": 2}, {"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]", "step": 3}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[H+:9].[O:3]([H:7])([H:8])", "step": 4}], "cat": "[H+]"}]}]