From 008b3a334c7f3164083d0c54eb1ddf2c3efb6bed Mon Sep 17 00:00:00 2001 From: wiederm Date: Wed, 3 Apr 2019 13:02:47 -0400 Subject: [PATCH] changes to add tautomer functionality available. --- examples/tautomers/bmi.json | 62 ++ ...d-06cd-4e0c-9a60-cf19422b7595-bondfix.mol2 | 221 +++++ .../341660ed-06cd-4e0c-9a60-cf19422b7595.sdf | 218 +++++ examples/tautomers/bmi/input/README | 2 + examples/tautomers/bmi/input/bmi.pdb | 64 ++ .../bmi/input/bmi_hydrogen_fixed.mol2 | 220 +++++ .../bmi/input/bmi_hydrogen_fixed1.mol2 | 220 +++++ examples/tautomers/bmi/input/bmi_t1.mol2 | 111 +++ examples/tautomers/bmi/input/bmi_t1.sdf | 120 +++ examples/tautomers/bmi/input/bmi_t2.mol2 | 111 +++ examples/tautomers/bmi/input/bmi_t2.sdf | 120 +++ .../tautomers/bmi/input/bmi_tautomer_set.mol2 | 221 +++++ examples/tautomers/keto-enol.json | 62 ++ examples/tautomers/tautomer_utils.py | 348 +++++++ examples/tautomers/tautomers.ipynb | 145 +++ protons/app/__init__.py | 2 +- protons/app/driver.py | 809 +++++++++++++++- protons/app/ligands.py | 882 +++++++++++++++++- 18 files changed, 3925 insertions(+), 13 deletions(-) create mode 100644 examples/tautomers/bmi.json create mode 100644 examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595-bondfix.mol2 create mode 100644 examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595.sdf create mode 100644 examples/tautomers/bmi/input/README create mode 100644 examples/tautomers/bmi/input/bmi.pdb create mode 100644 examples/tautomers/bmi/input/bmi_hydrogen_fixed.mol2 create mode 100644 examples/tautomers/bmi/input/bmi_hydrogen_fixed1.mol2 create mode 100644 examples/tautomers/bmi/input/bmi_t1.mol2 create mode 100644 examples/tautomers/bmi/input/bmi_t1.sdf create mode 100644 examples/tautomers/bmi/input/bmi_t2.mol2 create mode 100644 examples/tautomers/bmi/input/bmi_t2.sdf create mode 100644 examples/tautomers/bmi/input/bmi_tautomer_set.mol2 create mode 100644 examples/tautomers/keto-enol.json create mode 100644 examples/tautomers/tautomer_utils.py create mode 100644 examples/tautomers/tautomers.ipynb diff --git a/examples/tautomers/bmi.json b/examples/tautomers/bmi.json new file mode 100644 index 0000000..5633216 --- /dev/null +++ b/examples/tautomers/bmi.json @@ -0,0 +1,62 @@ +{ + + "_comment": "This script handles the calibration run for BMI (ligand of 2oj9).", + "name" : "bmi", + "input": { + "_comment": "Simulation requires an mmCIF file and a ffxml residue. Please specify the input directory under dir.", + "dir": "/home/mwieder/Work/Projects/protons-dev/protons/examples/tautomers/bmi/input/", + "structure": "bmi.cif" + }, + "output": { + "dir": "/home/mwieder/Work/Projects/protons-dev/protons/examples/tautomers/bmi/output/" + + }, + "forcefield": { + "_comment1": "Standard, included xml files. Don't forget -obc2 files if using implicit.", + "default": ["amber10-constph.xml", "gaff.xml", "tip3p.xml"], + "_comment2": "Custom generated xml file (needs to be in input dir.", + "user": ["bmi.xml"] + }, + "format_vars": { + "_comment1": "These variables are filled into file names for input and output when you use {} style syntax.", + "name": "bmi" + }, + "system": { + "_comment1": "Systemwide settings, such as temperature, and long range method", + "_comment2": "If PME left out, nocutoff is used.", + "PME": { + "_comment": "Ewald + periodic system settings", + "ewald_error_tolerance": 1.0e-5, + "switching_distance_nm": 0.85, + "nonbonded_cutoff_nm": 1.0, + "barostat_interval": 25, + "pressure_atm": 1.0 + }, + "temperature_k": 300.0, + "salt_concentration_molar": 0.150 + }, + "integrator": { + "timestep_fs": 2.0, + "constraint_tolerance": 1.0e-7, + "collision_rate_per_ps": 1.0 + }, + "preprocessing": { + "minimization_tolerance_kjmol": 1.0e-5, + "minimization_max_iterations": 300, + "num_thermalization_steps": 100 + }, + "SAMS": { + "beta": 0.6, + "flatness_criterion": 0.1, + "sites": "multi", + "update_rule": "binary", + "min_burn": 200, + "min_slow": 200, + "min_fast": 200 + }, + "simulation" : { + "dcd_filename" : "{name}.dcd" , + "netCDF4" : "{name}.nc" + } + +} diff --git a/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595-bondfix.mol2 b/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595-bondfix.mol2 new file mode 100644 index 0000000..e3f7a0f --- /dev/null +++ b/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595-bondfix.mol2 @@ -0,0 +1,221 @@ +@MOLECULE +Structure5 + 49 53 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C12 -3.4790 2.6799 -6.3069 C.ar 1 UNL1 0.0293 + 17 C13 -1.1754 3.0676 -5.3133 C.ar 1 UNL1 0.0498 + 18 C14 -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0103 + 19 N4 -2.3988 3.4661 -6.0034 N.ar 1 UNL1 -0.3058 + 20 C15 -2.6948 4.7147 -6.4799 C.ar 1 UNL1 0.1004 + 21 N5 -3.8752 4.7586 -7.0542 N.ar 1 UNL1 -0.2437 + 22 C16 -4.3619 3.5300 -6.9560 C.ar 1 UNL1 0.0458 + 23 C17 -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0917 + 24 N6 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3375 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N7 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2274 + 27 C19 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0695 + 28 C20 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0231 + 29 C21 0.0605 3.3904 -5.8723 C.ar 1 UNL1 -0.0328 + 30 C22 2.5855 3.3886 -5.8692 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H -3.5216 1.6294 -6.0449 H 1 UNL1 0.0829 + 42 H -2.2176 2.1216 -3.6747 H 1 UNL1 0.0660 + 43 H -1.9881 5.5298 -6.3621 H 1 UNL1 0.1032 + 44 H -5.3359 3.2588 -7.3473 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H 0.0865 3.9316 -6.8098 H 1 UNL1 0.0641 + 47 H 2.4205 3.9273 -6.8023 H 1 UNL1 0.0278 + 48 H 3.1484 4.0187 -5.1806 H 1 UNL1 0.0278 + 49 H 3.1484 2.4772 -6.0706 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 1 + 3 1 15 1 + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 1 + 8 4 32 1 + 9 5 6 1 + 10 5 15 2 + 11 6 7 2 + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 2 + 18 9 11 1 + 19 9 37 1 + 20 10 12 1 + 21 10 38 1 + 22 11 14 2 + 23 11 39 1 + 24 12 13 2 + 25 13 14 1 + 26 14 40 1 + 27 15 25 1 + 28 16 19 1 + 29 16 22 2 + 30 16 41 1 + 31 17 18 1 + 32 17 19 1 + 33 17 29 2 + 34 18 23 1 + 35 18 42 1 + 36 19 20 1 + 37 20 21 2 + 38 20 43 1 + 39 21 22 1 + 40 22 44 1 + 41 23 26 2 + 42 23 27 1 + 43 24 25 2 + 44 24 27 1 + 45 24 45 1 + 46 25 26 1 + 47 27 28 2 + 48 28 29 1 + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 + +@MOLECULE +Structure6 + 49 53 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C12 0.8141 5.2498 -7.4277 C.ar 1 UNL1 0.0293 + 17 C13 0.0605 3.3904 -5.8723 C.ar 1 UNL1 0.0498 + 18 C14 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0123 + 19 N4 0.0956 4.1202 -7.1364 N.ar 1 UNL1 -0.3058 + 20 C15 -0.5921 3.8054 -8.2773 C.ar 1 UNL1 0.1004 + 21 N5 -0.3526 4.6548 -9.2502 N.ar 1 UNL1 -0.2437 + 22 C16 0.4995 5.5366 -8.7477 C.ar 1 UNL1 0.0458 + 23 C17 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0687 + 24 N6 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2272 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N7 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3376 + 27 C19 -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0925 + 28 C20 -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0211 + 29 C21 -1.1754 3.0676 -5.3133 C.ar 1 UNL1 -0.0328 + 30 C22 -2.5901 2.0249 -3.5073 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H 1.4583 5.7290 -6.7003 H 1 UNL1 0.0829 + 42 H 2.2166 3.2898 -5.6981 H 1 UNL1 0.0659 + 43 H -1.2438 2.9382 -8.3047 H 1 UNL1 0.1032 + 44 H 0.8829 6.3685 -9.3275 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H -2.0826 3.3631 -5.8250 H 1 UNL1 0.0641 + 47 H -3.3861 2.3972 -4.1521 H 1 UNL1 0.0278 + 48 H -2.6758 0.9424 -3.4123 H 1 UNL1 0.0278 + 49 H -2.6758 2.4839 -2.5223 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 1 + 3 1 15 1 + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 1 + 8 4 32 1 + 9 5 6 1 + 10 5 15 2 + 11 6 7 2 + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 2 + 18 9 11 1 + 19 9 37 1 + 20 10 12 1 + 21 10 38 1 + 22 11 14 2 + 23 11 39 1 + 24 12 13 2 + 25 13 14 1 + 26 14 40 1 + 27 15 25 1 + 28 16 19 1 + 29 16 22 2 + 30 16 41 1 + 31 17 18 1 + 32 17 19 1 + 33 17 29 2 + 34 18 23 1 + 35 18 42 1 + 36 19 20 1 + 37 20 21 2 + 38 20 43 1 + 39 21 22 1 + 40 22 44 1 + 41 23 26 2 + 42 23 27 1 + 43 24 25 2 + 44 24 27 1 + 45 25 26 1 + 46 26 45 1 + 47 27 28 2 + 48 28 29 1 + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595.sdf b/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595.sdf new file mode 100644 index 0000000..fd21910 --- /dev/null +++ b/examples/tautomers/bmi/input/341660ed-06cd-4e0c-9a60-cf19422b7595.sdf @@ -0,0 +1,218 @@ +Structure5 + -OEChem-04031912343D + + 49 53 0 1 0 0 0 0 0999 V2000 + 3.6090 0.6174 -1.0694 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2430 1.1446 -1.9825 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2256 0.0104 -0.0180 N 0 3 0 0 0 3 0 0 0 0 0 0 + 1.4600 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1963 -0.6286 1.0888 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5428 -0.5862 1.0154 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7300 -1.2644 0.0003 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3124 -2.8932 -3.3965 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0587 -2.5026 -2.0819 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4876 -2.4654 -4.0105 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9800 -1.6978 -1.4138 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1281 -1.2743 -2.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3628 -1.6620 -3.2808 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1491 0.5849 -1.0130 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4790 2.6799 -6.3069 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1754 3.0676 -5.3133 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2589 2.3705 -4.1059 C 0 5 3 0 0 0 0 0 0 0 0 0 + -2.3988 3.4661 -6.0034 N 0 3 0 0 0 3 0 0 0 0 0 0 + -2.6948 4.7147 -6.4799 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8752 4.7586 -7.0542 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.3619 3.5300 -6.9560 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0730 1.9990 -3.4623 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1001 1.8299 -3.1695 N 0 3 0 0 0 0 0 0 0 0 0 0 + 1.4071 1.2367 -2.1420 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1058 1.3180 -2.2829 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1720 2.3377 -4.0490 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2633 3.0344 -5.2558 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0605 3.3904 -5.8723 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5855 3.3886 -5.8692 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5050 0.8747 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2340 0.0309 -0.0536 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6865 -1.1065 1.9164 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2081 -1.0140 1.7562 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6824 -1.1340 0.5141 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1409 -2.0239 0.5143 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3912 -3.5176 -3.9304 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8462 -2.8212 -1.5828 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7169 -2.7490 -5.0286 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2841 -1.3150 -3.7334 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5216 1.6294 -6.0449 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2176 2.1216 -3.6747 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9881 5.5298 -6.3621 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.3359 3.2588 -7.3473 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1139 1.8582 -3.2185 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0865 3.9316 -6.8098 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4205 3.9273 -6.8023 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1484 4.0187 -5.1806 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1484 2.4772 -6.0706 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 0 0 + 1 4 1 0 0 0 0 + 1 15 1 0 0 0 0 + 3 5 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 31 1 0 0 0 0 + 4 7 1 0 0 0 0 + 4 32 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 15 2 0 0 0 0 + 6 7 2 0 0 0 0 + 6 33 1 0 0 0 0 + 7 34 1 0 0 0 0 + 8 12 1 0 0 0 0 + 8 35 1 0 0 0 0 + 8 36 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 11 1 0 0 0 0 + 9 37 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 38 1 0 0 0 0 + 11 14 2 0 0 0 0 + 11 39 1 0 0 0 0 + 12 13 2 0 0 0 0 + 13 14 1 0 0 0 0 + 14 40 1 0 0 0 0 + 15 25 1 0 0 0 0 + 16 19 1 0 0 0 0 + 16 22 2 0 0 0 0 + 16 41 1 0 0 0 0 + 17 18 1 0 0 0 0 + 17 19 1 0 0 0 0 + 17 29 2 0 0 0 0 + 18 23 1 0 0 0 0 + 18 42 1 0 0 0 0 + 19 20 1 0 0 0 0 + 20 21 2 0 0 0 0 + 20 43 1 0 0 0 0 + 21 22 1 0 0 0 0 + 22 44 1 0 0 0 0 + 23 26 2 0 0 0 0 + 23 27 1 0 0 0 0 + 24 25 2 0 0 0 0 + 24 27 1 0 0 0 0 + 24 45 1 0 0 0 0 + 25 26 1 0 0 0 0 + 27 28 2 0 0 0 0 + 28 29 1 0 0 0 0 + 28 30 1 0 0 0 0 + 29 46 1 0 0 0 0 + 30 47 1 0 0 0 0 + 30 48 1 0 0 0 0 + 30 49 1 0 0 0 0 +M CHG 4 4 1 18 -1 19 1 24 1 +M END +$$$$ +Structure6 + -OEChem-04031912343D + + 49 53 0 1 0 0 0 0 0999 V2000 + 3.6090 0.6174 -1.0694 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2430 1.1446 -1.9825 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2256 0.0104 -0.0180 N 0 3 0 0 0 3 0 0 0 0 0 0 + 1.4600 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1963 -0.6286 1.0888 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5428 -0.5862 1.0154 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7300 -1.2644 0.0003 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3124 -2.8932 -3.3965 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0587 -2.5026 -2.0819 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4876 -2.4654 -4.0105 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9800 -1.6978 -1.4138 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1281 -1.2743 -2.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3628 -1.6620 -3.2808 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1491 0.5849 -1.0130 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8141 5.2498 -7.4277 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0605 3.3904 -5.8723 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2633 3.0344 -5.2558 C 0 5 3 0 0 0 0 0 0 0 0 0 + 0.0956 4.1202 -7.1364 N 0 3 0 0 0 3 0 0 0 0 0 0 + -0.5921 3.8054 -8.2773 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3526 4.6548 -9.2502 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4995 5.5366 -8.7477 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1720 2.3377 -4.0490 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1058 1.3180 -2.2829 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4071 1.2367 -2.1420 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1001 1.8299 -3.1695 N 0 3 0 0 0 0 0 0 0 0 0 0 + -0.0730 1.9990 -3.4623 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2589 2.3705 -4.1059 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1754 3.0676 -5.3133 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5901 2.0249 -3.5073 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5050 0.8747 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2340 0.0309 -0.0536 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6865 -1.1065 1.9164 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2081 -1.0140 1.7562 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6824 -1.1340 0.5141 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1409 -2.0239 0.5143 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3912 -3.5176 -3.9304 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8462 -2.8212 -1.5828 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7169 -2.7490 -5.0286 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2841 -1.3150 -3.7334 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4583 5.7290 -6.7003 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2166 3.2898 -5.6981 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2438 2.9382 -8.3047 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8829 6.3685 -9.3275 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1139 1.8582 -3.2185 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0826 3.3631 -5.8250 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3861 2.3972 -4.1521 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6758 0.9424 -3.4123 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6758 2.4839 -2.5223 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 0 0 + 1 4 1 0 0 0 0 + 1 15 1 0 0 0 0 + 3 5 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 31 1 0 0 0 0 + 4 7 1 0 0 0 0 + 4 32 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 15 2 0 0 0 0 + 6 7 2 0 0 0 0 + 6 33 1 0 0 0 0 + 7 34 1 0 0 0 0 + 8 12 1 0 0 0 0 + 8 35 1 0 0 0 0 + 8 36 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 11 1 0 0 0 0 + 9 37 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 38 1 0 0 0 0 + 11 14 2 0 0 0 0 + 11 39 1 0 0 0 0 + 12 13 2 0 0 0 0 + 13 14 1 0 0 0 0 + 14 40 1 0 0 0 0 + 15 25 1 0 0 0 0 + 16 19 1 0 0 0 0 + 16 22 2 0 0 0 0 + 16 41 1 0 0 0 0 + 17 18 1 0 0 0 0 + 17 19 1 0 0 0 0 + 17 29 2 0 0 0 0 + 18 23 1 0 0 0 0 + 18 42 1 0 0 0 0 + 19 20 1 0 0 0 0 + 20 21 2 0 0 0 0 + 20 43 1 0 0 0 0 + 21 22 1 0 0 0 0 + 22 44 1 0 0 0 0 + 23 26 2 0 0 0 0 + 23 27 1 0 0 0 0 + 24 25 2 0 0 0 0 + 24 27 1 0 0 0 0 + 25 26 1 0 0 0 0 + 26 45 1 0 0 0 0 + 27 28 2 0 0 0 0 + 28 29 1 0 0 0 0 + 28 30 1 0 0 0 0 + 29 46 1 0 0 0 0 + 30 47 1 0 0 0 0 + 30 48 1 0 0 0 0 + 30 49 1 0 0 0 0 +M CHG 4 4 1 18 -1 19 1 26 1 +M END +$$$$ diff --git a/examples/tautomers/bmi/input/README b/examples/tautomers/bmi/input/README new file mode 100644 index 0000000..740fb9e --- /dev/null +++ b/examples/tautomers/bmi/input/README @@ -0,0 +1,2 @@ +t1 is wrong tautomer +t2 is dominant in protein binding site diff --git a/examples/tautomers/bmi/input/bmi.pdb b/examples/tautomers/bmi/input/bmi.pdb new file mode 100644 index 0000000..feeecbf --- /dev/null +++ b/examples/tautomers/bmi/input/bmi.pdb @@ -0,0 +1,64 @@ +COMPND Structure5 +AUTHOR GENERATED BY OPEN BABEL 2.4.1 +HETATM 1 C1 UNL 1 3.609 0.617 -1.069 1.00 0.00 C +HETATM 2 O1 UNL 1 4.243 1.145 -1.982 1.00 0.00 O +HETATM 3 N1 UNL 1 0.000 0.000 0.000 1.00 0.00 N +HETATM 4 N2 UNL 1 4.226 0.010 -0.018 1.00 0.00 N +HETATM 5 C2 UNL 1 1.460 0.000 0.000 1.00 0.00 C +HETATM 6 C3 UNL 1 2.196 -0.629 1.089 1.00 0.00 C +HETATM 7 C4 UNL 1 3.543 -0.586 1.015 1.00 0.00 C +HETATM 8 C5 UNL 1 -0.730 -1.264 0.000 1.00 0.00 C +HETATM 9 C6 UNL 1 -0.312 -2.893 -3.397 1.00 0.00 C +HETATM 10 C7 UNL 1 -0.059 -2.503 -2.082 1.00 0.00 C +HETATM 11 C8 UNL 1 -1.488 -2.465 -4.011 1.00 0.00 C +HETATM 12 C9 UNL 1 -0.980 -1.698 -1.414 1.00 0.00 C +HETATM 13 N3 UNL 1 -2.128 -1.274 -2.000 1.00 0.00 N +HETATM 14 C10 UNL 1 -2.363 -1.662 -3.281 1.00 0.00 C +HETATM 15 C11 UNL 1 2.149 0.585 -1.013 1.00 0.00 C +HETATM 16 C12 UNL 1 -3.479 2.680 -6.307 1.00 0.00 C +HETATM 17 C13 UNL 1 -1.175 3.068 -5.313 1.00 0.00 C +HETATM 18 C14 UNL 1 -1.259 2.370 -4.106 1.00 0.00 C +HETATM 19 N4 UNL 1 -2.399 3.466 -6.003 1.00 0.00 N +HETATM 20 C15 UNL 1 -2.695 4.715 -6.480 1.00 0.00 C +HETATM 21 N5 UNL 1 -3.875 4.759 -7.054 1.00 0.00 N +HETATM 22 C16 UNL 1 -4.362 3.530 -6.956 1.00 0.00 C +HETATM 23 C17 UNL 1 -0.073 1.999 -3.462 1.00 0.00 C +HETATM 24 N6 UNL 1 2.100 1.830 -3.170 1.00 0.00 N +HETATM 25 C18 UNL 1 1.407 1.237 -2.142 1.00 0.00 C +HETATM 26 N7 UNL 1 0.106 1.318 -2.283 1.00 0.00 N +HETATM 27 C19 UNL 1 1.172 2.338 -4.049 1.00 0.00 C +HETATM 28 C20 UNL 1 1.263 3.034 -5.256 1.00 0.00 C +HETATM 29 C21 UNL 1 0.060 3.390 -5.872 1.00 0.00 C +HETATM 30 C22 UNL 1 2.586 3.389 -5.869 1.00 0.00 C +CONECT 1 2 4 15 +CONECT 2 1 +CONECT 3 5 8 +CONECT 4 1 7 +CONECT 5 3 6 15 +CONECT 6 5 7 +CONECT 7 4 6 +CONECT 8 3 12 +CONECT 9 10 11 +CONECT 10 9 12 +CONECT 11 9 14 +CONECT 12 8 10 13 +CONECT 13 12 14 +CONECT 14 11 13 +CONECT 15 1 5 25 +CONECT 16 19 22 +CONECT 17 18 19 29 +CONECT 18 17 23 +CONECT 19 16 17 20 +CONECT 20 19 21 +CONECT 21 20 22 +CONECT 22 16 21 +CONECT 23 18 26 27 +CONECT 24 25 27 +CONECT 25 15 24 26 +CONECT 26 23 25 +CONECT 27 23 24 28 +CONECT 28 27 29 30 +CONECT 29 17 28 +CONECT 30 28 +MASTER 0 0 0 0 0 0 0 0 30 0 30 0 +END diff --git a/examples/tautomers/bmi/input/bmi_hydrogen_fixed.mol2 b/examples/tautomers/bmi/input/bmi_hydrogen_fixed.mol2 new file mode 100644 index 0000000..b49218f --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_hydrogen_fixed.mol2 @@ -0,0 +1,220 @@ +@MOLECULE +Structure5 + 49 53 0 0 0 +SMALL +USER_CHARGES + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 <0> 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 <0> -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 <0> -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 <0> -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 <0> 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 <0> -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 <0> 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 <0> 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 <0> -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 <0> -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 <0> -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 <0> 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 <0> -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 <0> 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 <0> 0.1022 + 16 C12 -3.4790 2.6799 -6.3069 C.ar 1 <0> 0.0293 + 17 C13 -1.1754 3.0676 -5.3133 C.ar 1 <0> 0.0498 + 18 C14 -1.2589 2.3705 -4.1059 C.ar 1 <0> -0.0103 + 19 N4 -2.3988 3.4661 -6.0034 N.ar 1 <0> -0.3058 + 20 C15 -2.6948 4.7147 -6.4799 C.ar 1 <0> 0.1004 + 21 N5 -3.8752 4.7586 -7.0542 N.ar 1 <0> -0.2437 + 22 C16 -4.3619 3.5300 -6.9560 C.ar 1 <0> 0.0458 + 23 C17 -0.0730 1.9990 -3.4623 C.ar 1 <0> 0.0917 + 24 N6 2.1001 1.8299 -3.1695 N.ar 1 <0> -0.3375 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 <0> 0.1463 + 26 N7 0.1058 1.3180 -2.2829 N.ar 1 <0> -0.2274 + 27 C19 1.1720 2.3377 -4.0490 C.ar 1 <0> 0.0695 + 28 C20 1.2633 3.0344 -5.2558 C.ar 1 <0> -0.0231 + 29 C21 0.0605 3.3904 -5.8723 C.ar 1 <0> -0.0328 + 30 C22 2.5855 3.3886 -5.8692 C.3 1 <0> -0.0376 + 31 H1 -0.5050 0.8747 0.0000 H 1 <0> 0.1465 + 32 H2 5.2340 0.0309 -0.0536 H 1 <0> 0.1683 + 33 H3 1.6865 -1.1065 1.9164 H 1 <0> 0.0651 + 34 H4 4.2081 -1.0140 1.7562 H 1 <0> 0.0808 + 35 H5 -1.6824 -1.1340 0.5141 H 1 <0> 0.0526 + 36 H6 -0.1409 -2.0239 0.5143 H 1 <0> 0.0526 + 37 H7 0.3912 -3.5176 -3.9304 H 1 <0> 0.0619 + 38 H8 0.8462 -2.8212 -1.5828 H 1 <0> 0.0636 + 39 H9 -1.7169 -2.7490 -5.0286 H 1 <0> 0.0633 + 40 H10 -3.2841 -1.3150 -3.7334 H 1 <0> 0.0829 + 41 H11 -3.5216 1.6294 -6.0449 H 1 <0> 0.0829 + 42 H12 -2.2176 2.1216 -3.6747 H 1 <0> 0.0660 + 43 H13 -1.9881 5.5298 -6.3621 H 1 <0> 0.1032 + 44 H14 -5.3359 3.2588 -7.3473 H 1 <0> 0.0845 + 45 H15 3.1139 1.8582 -3.2185 H 1 <0> 0.1674 + 46 H16 0.0865 3.9316 -6.8098 H 1 <0> 0.0641 + 47 H17 2.4205 3.9273 -6.8023 H 1 <0> 0.0278 + 48 H18 3.1484 4.0187 -5.1806 H 1 <0> 0.0278 + 49 H19 3.1484 2.4772 -6.0706 H 1 <0> 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 24 45 1 + 46 25 26 ar + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 +@MOLECULE +Structure6 + 49 53 0 0 0 +SMALL +USER_CHARGES + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 <0> 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 <0> -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 <0> -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 <0> -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 <0> 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 <0> -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 <0> 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 <0> 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 <0> -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 <0> -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 <0> -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 <0> 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 <0> -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 <0> 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 <0> 0.1022 + 16 C12 0.8141 5.2498 -7.4277 C.ar 1 <0> 0.0293 + 17 C13 0.0605 3.3904 -5.8723 C.ar 1 <0> 0.0498 + 18 C14 1.2633 3.0344 -5.2558 C.ar 1 <0> -0.0123 + 19 N4 0.0956 4.1202 -7.1364 N.ar 1 <0> -0.3058 + 20 C15 -0.5921 3.8054 -8.2773 C.ar 1 <0> 0.1004 + 21 N5 -0.3526 4.6548 -9.2502 N.ar 1 <0> -0.2437 + 22 C16 0.4995 5.5366 -8.7477 C.ar 1 <0> 0.0458 + 23 C17 1.1720 2.3377 -4.0490 C.ar 1 <0> 0.0687 + 24 N6 0.1058 1.3180 -2.2829 N.ar 1 <0> -0.2272 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 <0> 0.1463 + 26 N7 2.1001 1.8299 -3.1695 N.ar 1 <0> -0.3376 + 27 C19 -0.0730 1.9990 -3.4623 C.ar 1 <0> 0.0925 + 28 C20 -1.2589 2.3705 -4.1059 C.ar 1 <0> -0.0211 + 29 C21 -1.1754 3.0676 -5.3133 C.ar 1 <0> -0.0328 + 30 C22 -2.5901 2.0249 -3.5073 C.3 1 <0> -0.0376 + 31 H1 -0.5050 0.8747 0.0000 H 1 <0> 0.1465 + 32 H2 5.2340 0.0309 -0.0536 H 1 <0> 0.1683 + 33 H3 1.6865 -1.1065 1.9164 H 1 <0> 0.0651 + 34 H4 4.2081 -1.0140 1.7562 H 1 <0> 0.0808 + 35 H5 -1.6824 -1.1340 0.5141 H 1 <0> 0.0526 + 36 H6 -0.1409 -2.0239 0.5143 H 1 <0> 0.0526 + 37 H7 0.3912 -3.5176 -3.9304 H 1 <0> 0.0619 + 38 H8 0.8462 -2.8212 -1.5828 H 1 <0> 0.0636 + 39 H9 -1.7169 -2.7490 -5.0286 H 1 <0> 0.0633 + 40 H10 -3.2841 -1.3150 -3.7334 H 1 <0> 0.0829 + 41 H11 1.4583 5.7290 -6.7003 H 1 <0> 0.0829 + 42 H12 2.2166 3.2898 -5.6981 H 1 <0> 0.0659 + 43 H13 -1.2438 2.9382 -8.3047 H 1 <0> 0.1032 + 44 H14 0.8829 6.3685 -9.3275 H 1 <0> 0.0845 + 45 H20 3.1139 1.8582 -3.2185 H 1 <0> 0.1674 + 46 H16 -2.0826 3.3631 -5.8250 H 1 <0> 0.0641 + 47 H17 -3.3861 2.3972 -4.1521 H 1 <0> 0.0278 + 48 H18 -2.6758 0.9424 -3.4123 H 1 <0> 0.0278 + 49 H19 -2.6758 2.4839 -2.5223 H 1 <0> 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 25 26 ar + 46 26 45 1 + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/bmi/input/bmi_hydrogen_fixed1.mol2 b/examples/tautomers/bmi/input/bmi_hydrogen_fixed1.mol2 new file mode 100644 index 0000000..8dccc24 --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_hydrogen_fixed1.mol2 @@ -0,0 +1,220 @@ +@MOLECULE +Structure5 + 49 53 0 0 0 +SMALL +USER_CHARGES + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 <0> 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 <0> -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 <0> -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 <0> -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 <0> 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 <0> -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 <0> 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 <0> 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 <0> -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 <0> -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 <0> -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 <0> 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 <0> -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 <0> 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 <0> 0.1022 + 16 C12 -3.4790 2.6799 -6.3069 C.ar 1 <0> 0.0293 + 17 C13 -1.1754 3.0676 -5.3133 C.ar 1 <0> 0.0498 + 18 C14 -1.2589 2.3705 -4.1059 C.ar 1 <0> -0.0103 + 19 N4 -2.3988 3.4661 -6.0034 N.ar 1 <0> -0.3058 + 20 C15 -2.6948 4.7147 -6.4799 C.ar 1 <0> 0.1004 + 21 N5 -3.8752 4.7586 -7.0542 N.ar 1 <0> -0.2437 + 22 C16 -4.3619 3.5300 -6.9560 C.ar 1 <0> 0.0458 + 23 C17 -0.0730 1.9990 -3.4623 C.ar 1 <0> 0.0917 + 24 N6 2.1001 1.8299 -3.1695 N.ar 1 <0> -0.3375 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 <0> 0.1463 + 26 N7 0.1058 1.3180 -2.2829 N.ar 1 <0> -0.2274 + 27 C19 1.1720 2.3377 -4.0490 C.ar 1 <0> 0.0695 + 28 C20 1.2633 3.0344 -5.2558 C.ar 1 <0> -0.0231 + 29 C21 0.0605 3.3904 -5.8723 C.ar 1 <0> -0.0328 + 30 C22 2.5855 3.3886 -5.8692 C.3 1 <0> -0.0376 + 31 H1 -0.5050 0.8747 0.0000 H 1 <0> 0.1465 + 32 H2 5.2340 0.0309 -0.0536 H 1 <0> 0.1683 + 33 H3 1.6865 -1.1065 1.9164 H 1 <0> 0.0651 + 34 H4 4.2081 -1.0140 1.7562 H 1 <0> 0.0808 + 35 H5 -1.6824 -1.1340 0.5141 H 1 <0> 0.0526 + 36 H6 -0.1409 -2.0239 0.5143 H 1 <0> 0.0526 + 37 H7 0.3912 -3.5176 -3.9304 H 1 <0> 0.0619 + 38 H8 0.8462 -2.8212 -1.5828 H 1 <0> 0.0636 + 39 H9 -1.7169 -2.7490 -5.0286 H 1 <0> 0.0633 + 40 H10 -3.2841 -1.3150 -3.7334 H 1 <0> 0.0829 + 41 H11 -3.5216 1.6294 -6.0449 H 1 <0> 0.0829 + 42 H12 -2.2176 2.1216 -3.6747 H 1 <0> 0.0660 + 43 H13 -1.9881 5.5298 -6.3621 H 1 <0> 0.1032 + 44 H14 -5.3359 3.2588 -7.3473 H 1 <0> 0.0845 + 45 H15 3.1139 1.8582 -3.2185 H 1 <0> 0.1674 + 46 H16 0.0865 3.9316 -6.8098 H 1 <0> 0.0641 + 47 H17 2.4205 3.9273 -6.8023 H 1 <0> 0.0278 + 48 H18 3.1484 4.0187 -5.1806 H 1 <0> 0.0278 + 49 H19 3.1484 2.4772 -6.0706 H 1 <0> 0.0278 +@BOND + 1 1 2 2 + 2 1 4 1 + 3 1 15 1 + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 1 + 8 4 32 1 + 9 5 6 1 + 10 5 15 2 + 11 6 7 2 + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 2 + 18 9 11 1 + 19 9 37 1 + 20 10 12 1 + 21 10 38 1 + 22 11 14 2 + 23 11 39 1 + 24 12 13 2 + 25 13 14 1 + 26 14 40 1 + 27 15 25 1 + 28 16 19 1 + 29 16 22 2 + 30 16 41 1 + 31 17 18 1 + 32 17 19 1 + 33 17 29 2 + 34 18 23 1 + 35 18 42 1 + 36 19 20 1 + 37 20 21 2 + 38 20 43 1 + 39 21 22 1 + 40 22 44 1 + 41 23 26 2 + 42 23 27 1 + 43 24 25 2 + 44 24 27 1 + 45 24 45 1 + 46 25 26 1 + 47 27 28 2 + 48 28 29 1 + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 +@MOLECULE +Structure6 + 49 53 0 0 0 +SMALL +USER_CHARGES + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 <0> 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 <0> -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 <0> -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 <0> -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 <0> 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 <0> -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 <0> 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 <0> 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 <0> -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 <0> -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 <0> -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 <0> 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 <0> -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 <0> 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 <0> 0.1022 + 16 C12 0.8141 5.2498 -7.4277 C.ar 1 <0> 0.0293 + 17 C13 0.0605 3.3904 -5.8723 C.ar 1 <0> 0.0498 + 18 C14 1.2633 3.0344 -5.2558 C.ar 1 <0> -0.0123 + 19 N4 0.0956 4.1202 -7.1364 N.ar 1 <0> -0.3058 + 20 C15 -0.5921 3.8054 -8.2773 C.ar 1 <0> 0.1004 + 21 N5 -0.3526 4.6548 -9.2502 N.ar 1 <0> -0.2437 + 22 C16 0.4995 5.5366 -8.7477 C.ar 1 <0> 0.0458 + 23 C17 1.1720 2.3377 -4.0490 C.ar 1 <0> 0.0687 + 24 N6 0.1058 1.3180 -2.2829 N.ar 1 <0> -0.2272 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 <0> 0.1463 + 26 N7 2.1001 1.8299 -3.1695 N.ar 1 <0> -0.3376 + 27 C19 -0.0730 1.9990 -3.4623 C.ar 1 <0> 0.0925 + 28 C20 -1.2589 2.3705 -4.1059 C.ar 1 <0> -0.0211 + 29 C21 -1.1754 3.0676 -5.3133 C.ar 1 <0> -0.0328 + 30 C22 -2.5901 2.0249 -3.5073 C.3 1 <0> -0.0376 + 31 H1 -0.5050 0.8747 0.0000 H 1 <0> 0.1465 + 32 H2 5.2340 0.0309 -0.0536 H 1 <0> 0.1683 + 33 H3 1.6865 -1.1065 1.9164 H 1 <0> 0.0651 + 34 H4 4.2081 -1.0140 1.7562 H 1 <0> 0.0808 + 35 H5 -1.6824 -1.1340 0.5141 H 1 <0> 0.0526 + 36 H6 -0.1409 -2.0239 0.5143 H 1 <0> 0.0526 + 37 H7 0.3912 -3.5176 -3.9304 H 1 <0> 0.0619 + 38 H8 0.8462 -2.8212 -1.5828 H 1 <0> 0.0636 + 39 H9 -1.7169 -2.7490 -5.0286 H 1 <0> 0.0633 + 40 H10 -3.2841 -1.3150 -3.7334 H 1 <0> 0.0829 + 41 H11 1.4583 5.7290 -6.7003 H 1 <0> 0.0829 + 42 H12 2.2166 3.2898 -5.6981 H 1 <0> 0.0659 + 43 H13 -1.2438 2.9382 -8.3047 H 1 <0> 0.1032 + 44 H14 0.8829 6.3685 -9.3275 H 1 <0> 0.0845 + 45 H20 3.1139 1.8582 -3.2185 H 1 <0> 0.1674 + 46 H16 -2.0826 3.3631 -5.8250 H 1 <0> 0.0641 + 47 H17 -3.3861 2.3972 -4.1521 H 1 <0> 0.0278 + 48 H18 -2.6758 0.9424 -3.4123 H 1 <0> 0.0278 + 49 H19 -2.6758 2.4839 -2.5223 H 1 <0> 0.0278 +@BOND + 1 1 2 2 + 2 1 4 1 + 3 1 15 1 + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 1 + 8 4 32 1 + 9 5 6 1 + 10 5 15 2 + 11 6 7 2 + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 2 + 18 9 11 1 + 19 9 37 1 + 20 10 12 1 + 21 10 38 1 + 22 11 14 2 + 23 11 39 1 + 24 12 13 2 + 25 13 14 1 + 26 14 40 1 + 27 15 25 1 + 28 16 19 1 + 29 16 22 2 + 30 16 41 1 + 31 17 18 1 + 32 17 19 1 + 33 17 29 2 + 34 18 23 1 + 35 18 42 1 + 36 19 20 1 + 37 20 21 2 + 38 20 43 1 + 39 21 22 1 + 40 22 44 1 + 41 23 26 2 + 42 23 27 1 + 43 24 25 2 + 44 24 27 1 + 45 25 26 1 + 46 26 45 1 + 47 27 28 2 + 48 28 29 1 + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/bmi/input/bmi_t1.mol2 b/examples/tautomers/bmi/input/bmi_t1.mol2 new file mode 100644 index 0000000..f356fda --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_t1.mol2 @@ -0,0 +1,111 @@ +@MOLECULE +Structure5 + 49 53 0 0 0 +SMALL +GASTEIGER +**** +Structure written by MMmdl. +@ATOM + 1 C 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C -3.4790 2.6799 -6.3069 C.ar 1 UNL1 0.0293 + 17 C -1.1754 3.0676 -5.3133 C.ar 1 UNL1 0.0498 + 18 C -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0103 + 19 N -2.3988 3.4661 -6.0034 N.ar 1 UNL1 -0.3058 + 20 C -2.6948 4.7147 -6.4799 C.ar 1 UNL1 0.1004 + 21 N -3.8752 4.7586 -7.0542 N.ar 1 UNL1 -0.2437 + 22 C -4.3619 3.5300 -6.9560 C.ar 1 UNL1 0.0458 + 23 C -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0917 + 24 N 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3375 + 25 C 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2274 + 27 C 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0695 + 28 C 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0231 + 29 C 0.0605 3.3904 -5.8723 C.ar 1 UNL1 -0.0328 + 30 C 2.5855 3.3886 -5.8692 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H -3.5216 1.6294 -6.0449 H 1 UNL1 0.0829 + 42 H -2.2176 2.1216 -3.6747 H 1 UNL1 0.0660 + 43 H -1.9881 5.5298 -6.3621 H 1 UNL1 0.1032 + 44 H -5.3359 3.2588 -7.3473 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H 0.0865 3.9316 -6.8098 H 1 UNL1 0.0641 + 47 H 2.4205 3.9273 -6.8023 H 1 UNL1 0.0278 + 48 H 3.1484 4.0187 -5.1806 H 1 UNL1 0.0278 + 49 H 3.1484 2.4772 -6.0706 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 24 45 1 + 46 25 26 ar + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/bmi/input/bmi_t1.sdf b/examples/tautomers/bmi/input/bmi_t1.sdf new file mode 100644 index 0000000..e25a415 --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_t1.sdf @@ -0,0 +1,120 @@ +Structure5 + 3D + Structure written by MMmdl. + 49 53 0 0 1 0 999 V2000 + 3.6090 0.6174 -1.0694 C 0 0 0 0 0 0 + 4.2430 1.1446 -1.9825 O 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 + 4.2256 0.0104 -0.0180 N 0 0 0 0 0 0 + 1.4600 0.0000 0.0000 C 0 0 0 0 0 0 + 2.1963 -0.6286 1.0888 C 0 0 0 0 0 0 + 3.5428 -0.5862 1.0154 C 0 0 0 0 0 0 + -0.7300 -1.2644 0.0003 C 0 0 0 0 0 0 + -0.3124 -2.8932 -3.3965 C 0 0 0 0 0 0 + -0.0587 -2.5026 -2.0819 C 0 0 0 0 0 0 + -1.4876 -2.4654 -4.0105 C 0 0 0 0 0 0 + -0.9800 -1.6978 -1.4138 C 0 0 0 0 0 0 + -2.1281 -1.2743 -2.0000 N 0 0 0 0 0 0 + -2.3628 -1.6620 -3.2808 C 0 0 0 0 0 0 + 2.1491 0.5849 -1.0130 C 0 0 0 0 0 0 + -3.4790 2.6799 -6.3069 C 0 0 0 0 0 0 + -1.1754 3.0676 -5.3133 C 0 0 0 0 0 0 + -1.2589 2.3705 -4.1059 C 0 0 0 0 0 0 + -2.3988 3.4661 -6.0034 N 0 0 0 0 0 0 + -2.6948 4.7147 -6.4799 C 0 0 0 0 0 0 + -3.8752 4.7586 -7.0542 N 0 0 0 0 0 0 + -4.3619 3.5300 -6.9560 C 0 0 0 0 0 0 + -0.0730 1.9990 -3.4623 C 0 0 0 0 0 0 + 2.1001 1.8299 -3.1695 N 0 0 0 0 0 0 + 1.4071 1.2367 -2.1420 C 0 0 0 0 0 0 + 0.1058 1.3180 -2.2829 N 0 0 0 0 0 0 + 1.1720 2.3377 -4.0490 C 0 0 0 0 0 0 + 1.2633 3.0344 -5.2558 C 0 0 0 0 0 0 + 0.0605 3.3904 -5.8723 C 0 0 0 0 0 0 + 2.5855 3.3886 -5.8692 C 0 0 0 0 0 0 + -0.5050 0.8747 0.0000 H 0 0 0 0 0 0 + 5.2340 0.0309 -0.0536 H 0 0 0 0 0 0 + 1.6865 -1.1065 1.9164 H 0 0 0 0 0 0 + 4.2081 -1.0140 1.7562 H 0 0 0 0 0 0 + -1.6824 -1.1340 0.5141 H 0 0 0 0 0 0 + -0.1409 -2.0239 0.5143 H 0 0 0 0 0 0 + 0.3912 -3.5176 -3.9304 H 0 0 0 0 0 0 + 0.8462 -2.8212 -1.5828 H 0 0 0 0 0 0 + -1.7169 -2.7490 -5.0286 H 0 0 0 0 0 0 + -3.2841 -1.3150 -3.7334 H 0 0 0 0 0 0 + -3.5216 1.6294 -6.0449 H 0 0 0 0 0 0 + -2.2176 2.1216 -3.6747 H 0 0 0 0 0 0 + -1.9881 5.5298 -6.3621 H 0 0 0 0 0 0 + -5.3359 3.2588 -7.3473 H 0 0 0 0 0 0 + 3.1139 1.8582 -3.2185 H 0 0 0 0 0 0 + 0.0865 3.9316 -6.8098 H 0 0 0 0 0 0 + 2.4205 3.9273 -6.8023 H 0 0 0 0 0 0 + 3.1484 4.0187 -5.1806 H 0 0 0 0 0 0 + 3.1484 2.4772 -6.0706 H 0 0 0 0 0 0 + 1 2 2 0 0 0 + 1 4 1 0 0 0 + 1 15 1 0 0 0 + 3 5 1 0 0 0 + 3 8 1 0 0 0 + 3 31 1 0 0 0 + 4 7 1 0 0 0 + 4 32 1 0 0 0 + 5 6 1 0 0 0 + 5 15 2 0 0 0 + 6 7 2 0 0 0 + 6 33 1 0 0 0 + 7 34 1 0 0 0 + 8 12 1 0 0 0 + 8 35 1 0 0 0 + 8 36 1 0 0 0 + 9 10 1 0 0 0 + 9 11 2 0 0 0 + 9 37 1 0 0 0 + 10 12 2 0 0 0 + 10 38 1 0 0 0 + 11 14 1 0 0 0 + 11 39 1 0 0 0 + 12 13 1 0 0 0 + 13 14 2 0 0 0 + 14 40 1 0 0 0 + 15 25 1 0 0 0 + 16 19 1 0 0 0 + 16 22 2 0 0 0 + 16 41 1 0 0 0 + 17 18 1 0 0 0 + 17 19 1 0 0 0 + 17 29 2 0 0 0 + 18 23 2 0 0 0 + 18 42 1 0 0 0 + 19 20 1 0 0 0 + 20 21 2 0 0 0 + 20 43 1 0 0 0 + 21 22 1 0 0 0 + 22 44 1 0 0 0 + 23 26 1 0 0 0 + 23 27 1 0 0 0 + 24 25 1 0 0 0 + 24 27 1 0 0 0 + 24 45 1 0 0 0 + 25 26 2 0 0 0 + 27 28 2 0 0 0 + 28 29 1 0 0 0 + 28 30 1 0 0 0 + 29 46 1 0 0 0 + 30 47 1 0 0 0 + 30 48 1 0 0 0 + 30 49 1 0 0 0 +M END +> +5 + +> +entry + +> +0 + +> +0 + +$$$$ diff --git a/examples/tautomers/bmi/input/bmi_t2.mol2 b/examples/tautomers/bmi/input/bmi_t2.mol2 new file mode 100644 index 0000000..aaae856 --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_t2.mol2 @@ -0,0 +1,111 @@ +@MOLECULE +Structure6 + 49 53 0 0 0 +SMALL +GASTEIGER +**** +Structure written by MMmdl. +@ATOM + 1 C 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C 0.8141 5.2498 -7.4277 C.ar 1 UNL1 0.0293 + 17 C 0.0605 3.3904 -5.8723 C.ar 1 UNL1 0.0498 + 18 C 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0123 + 19 N 0.0956 4.1202 -7.1364 N.ar 1 UNL1 -0.3058 + 20 C -0.5921 3.8054 -8.2773 C.ar 1 UNL1 0.1004 + 21 N -0.3526 4.6548 -9.2502 N.ar 1 UNL1 -0.2437 + 22 C 0.4995 5.5366 -8.7477 C.ar 1 UNL1 0.0458 + 23 C 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0687 + 24 N 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2272 + 25 C 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3376 + 27 C -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0925 + 28 C -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0211 + 29 C -1.1754 3.0676 -5.3133 C.ar 1 UNL1 -0.0328 + 30 C -2.5901 2.0249 -3.5073 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H 1.4583 5.7290 -6.7003 H 1 UNL1 0.0829 + 42 H 2.2166 3.2898 -5.6981 H 1 UNL1 0.0659 + 43 H -1.2438 2.9382 -8.3047 H 1 UNL1 0.1032 + 44 H 0.8829 6.3685 -9.3275 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H -2.0826 3.3631 -5.8250 H 1 UNL1 0.0641 + 47 H -3.3861 2.3972 -4.1521 H 1 UNL1 0.0278 + 48 H -2.6758 0.9424 -3.4123 H 1 UNL1 0.0278 + 49 H -2.6758 2.4839 -2.5223 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 25 26 ar + 46 26 45 1 + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/bmi/input/bmi_t2.sdf b/examples/tautomers/bmi/input/bmi_t2.sdf new file mode 100644 index 0000000..c990ebf --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_t2.sdf @@ -0,0 +1,120 @@ +Structure6 + 3D + Structure written by MMmdl. + 49 53 0 0 1 0 999 V2000 + 3.6090 0.6174 -1.0694 C 0 0 0 0 0 0 + 4.2430 1.1446 -1.9825 O 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 + 4.2256 0.0104 -0.0180 N 0 0 0 0 0 0 + 1.4600 0.0000 0.0000 C 0 0 0 0 0 0 + 2.1963 -0.6286 1.0888 C 0 0 0 0 0 0 + 3.5428 -0.5862 1.0154 C 0 0 0 0 0 0 + -0.7300 -1.2644 0.0003 C 0 0 0 0 0 0 + -0.3124 -2.8932 -3.3965 C 0 0 0 0 0 0 + -0.0587 -2.5026 -2.0819 C 0 0 0 0 0 0 + -1.4876 -2.4654 -4.0105 C 0 0 0 0 0 0 + -0.9800 -1.6978 -1.4138 C 0 0 0 0 0 0 + -2.1281 -1.2743 -2.0000 N 0 0 0 0 0 0 + -2.3628 -1.6620 -3.2808 C 0 0 0 0 0 0 + 2.1491 0.5849 -1.0130 C 0 0 0 0 0 0 + 0.8141 5.2498 -7.4277 C 0 0 0 0 0 0 + 0.0605 3.3904 -5.8723 C 0 0 0 0 0 0 + 1.2633 3.0344 -5.2558 C 0 0 0 0 0 0 + 0.0956 4.1202 -7.1364 N 0 0 0 0 0 0 + -0.5921 3.8054 -8.2773 C 0 0 0 0 0 0 + -0.3526 4.6548 -9.2502 N 0 0 0 0 0 0 + 0.4995 5.5366 -8.7477 C 0 0 0 0 0 0 + 1.1720 2.3377 -4.0490 C 0 0 0 0 0 0 + 0.1058 1.3180 -2.2829 N 0 0 0 0 0 0 + 1.4071 1.2367 -2.1420 C 0 0 0 0 0 0 + 2.1001 1.8299 -3.1695 N 0 0 0 0 0 0 + -0.0730 1.9990 -3.4623 C 0 0 0 0 0 0 + -1.2589 2.3705 -4.1059 C 0 0 0 0 0 0 + -1.1754 3.0676 -5.3133 C 0 0 0 0 0 0 + -2.5901 2.0249 -3.5073 C 0 0 0 0 0 0 + -0.5050 0.8747 0.0000 H 0 0 0 0 0 0 + 5.2340 0.0309 -0.0536 H 0 0 0 0 0 0 + 1.6865 -1.1065 1.9164 H 0 0 0 0 0 0 + 4.2081 -1.0140 1.7562 H 0 0 0 0 0 0 + -1.6824 -1.1340 0.5141 H 0 0 0 0 0 0 + -0.1409 -2.0239 0.5143 H 0 0 0 0 0 0 + 0.3912 -3.5176 -3.9304 H 0 0 0 0 0 0 + 0.8462 -2.8212 -1.5828 H 0 0 0 0 0 0 + -1.7169 -2.7490 -5.0286 H 0 0 0 0 0 0 + -3.2841 -1.3150 -3.7334 H 0 0 0 0 0 0 + 1.4583 5.7290 -6.7003 H 0 0 0 0 0 0 + 2.2166 3.2898 -5.6981 H 0 0 0 0 0 0 + -1.2438 2.9382 -8.3047 H 0 0 0 0 0 0 + 0.8829 6.3685 -9.3275 H 0 0 0 0 0 0 + 3.1139 1.8582 -3.2185 H 0 0 0 0 0 0 + -2.0826 3.3631 -5.8250 H 0 0 0 0 0 0 + -3.3861 2.3972 -4.1521 H 0 0 0 0 0 0 + -2.6758 0.9424 -3.4123 H 0 0 0 0 0 0 + -2.6758 2.4839 -2.5223 H 0 0 0 0 0 0 + 1 2 2 0 0 0 + 1 4 1 0 0 0 + 1 15 1 0 0 0 + 3 5 1 0 0 0 + 3 8 1 0 0 0 + 3 31 1 0 0 0 + 4 7 1 0 0 0 + 4 32 1 0 0 0 + 5 6 1 0 0 0 + 5 15 2 0 0 0 + 6 7 2 0 0 0 + 6 33 1 0 0 0 + 7 34 1 0 0 0 + 8 12 1 0 0 0 + 8 35 1 0 0 0 + 8 36 1 0 0 0 + 9 10 1 0 0 0 + 9 11 2 0 0 0 + 9 37 1 0 0 0 + 10 12 2 0 0 0 + 10 38 1 0 0 0 + 11 14 1 0 0 0 + 11 39 1 0 0 0 + 12 13 1 0 0 0 + 13 14 2 0 0 0 + 14 40 1 0 0 0 + 15 25 1 0 0 0 + 16 19 1 0 0 0 + 16 22 2 0 0 0 + 16 41 1 0 0 0 + 17 18 1 0 0 0 + 17 19 1 0 0 0 + 17 29 2 0 0 0 + 18 23 2 0 0 0 + 18 42 1 0 0 0 + 19 20 1 0 0 0 + 20 21 2 0 0 0 + 20 43 1 0 0 0 + 21 22 1 0 0 0 + 22 44 1 0 0 0 + 23 26 1 0 0 0 + 23 27 1 0 0 0 + 24 25 2 0 0 0 + 24 27 1 0 0 0 + 25 26 1 0 0 0 + 26 45 1 0 0 0 + 27 28 2 0 0 0 + 28 29 1 0 0 0 + 28 30 1 0 0 0 + 29 46 1 0 0 0 + 30 47 1 0 0 0 + 30 48 1 0 0 0 + 30 49 1 0 0 0 +M END +> +6 + +> +entry + +> +0 + +> +0 + +$$$$ diff --git a/examples/tautomers/bmi/input/bmi_tautomer_set.mol2 b/examples/tautomers/bmi/input/bmi_tautomer_set.mol2 new file mode 100644 index 0000000..af93110 --- /dev/null +++ b/examples/tautomers/bmi/input/bmi_tautomer_set.mol2 @@ -0,0 +1,221 @@ +@MOLECULE +Structure5 + 49 53 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C12 -3.4790 2.6799 -6.3069 C.ar 1 UNL1 0.0293 + 17 C13 -1.1754 3.0676 -5.3133 C.ar 1 UNL1 0.0498 + 18 C14 -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0103 + 19 N4 -2.3988 3.4661 -6.0034 N.ar 1 UNL1 -0.3058 + 20 C15 -2.6948 4.7147 -6.4799 C.ar 1 UNL1 0.1004 + 21 N5 -3.8752 4.7586 -7.0542 N.ar 1 UNL1 -0.2437 + 22 C16 -4.3619 3.5300 -6.9560 C.ar 1 UNL1 0.0458 + 23 C17 -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0917 + 24 N6 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3375 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N7 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2274 + 27 C19 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0695 + 28 C20 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0231 + 29 C21 0.0605 3.3904 -5.8723 C.ar 1 UNL1 -0.0328 + 30 C22 2.5855 3.3886 -5.8692 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H -3.5216 1.6294 -6.0449 H 1 UNL1 0.0829 + 42 H -2.2176 2.1216 -3.6747 H 1 UNL1 0.0660 + 43 H -1.9881 5.5298 -6.3621 H 1 UNL1 0.1032 + 44 H -5.3359 3.2588 -7.3473 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H 0.0865 3.9316 -6.8098 H 1 UNL1 0.0641 + 47 H 2.4205 3.9273 -6.8023 H 1 UNL1 0.0278 + 48 H 3.1484 4.0187 -5.1806 H 1 UNL1 0.0278 + 49 H 3.1484 2.4772 -6.0706 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 24 45 1 + 46 25 26 ar + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 + +@MOLECULE +Structure6 + 49 53 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C1 3.6090 0.6174 -1.0694 C.ar 1 UNL1 0.2612 + 2 O1 4.2430 1.1446 -1.9825 O.2 1 UNL1 -0.2671 + 3 N1 0.0000 0.0000 0.0000 N.pl3 1 UNL1 -0.3375 + 4 N2 4.2256 0.0104 -0.0180 N.ar 1 UNL1 -0.3284 + 5 C2 1.4600 0.0000 0.0000 C.ar 1 UNL1 0.0448 + 6 C3 2.1963 -0.6286 1.0888 C.ar 1 UNL1 -0.0239 + 7 C4 3.5428 -0.5862 1.0154 C.ar 1 UNL1 0.0067 + 8 C5 -0.7300 -1.2644 0.0003 C.3 1 UNL1 0.0515 + 9 C6 -0.3124 -2.8932 -3.3965 C.ar 1 UNL1 -0.0586 + 10 C7 -0.0587 -2.5026 -2.0819 C.ar 1 UNL1 -0.0388 + 11 C8 -1.4876 -2.4654 -4.0105 C.ar 1 UNL1 -0.0436 + 12 C9 -0.9800 -1.6978 -1.4138 C.ar 1 UNL1 0.0582 + 13 N3 -2.1281 -1.2743 -2.0000 N.ar 1 UNL1 -0.2583 + 14 C10 -2.3628 -1.6620 -3.2808 C.ar 1 UNL1 0.0279 + 15 C11 2.1491 0.5849 -1.0130 C.ar 1 UNL1 0.1022 + 16 C12 0.8141 5.2498 -7.4277 C.ar 1 UNL1 0.0293 + 17 C13 0.0605 3.3904 -5.8723 C.ar 1 UNL1 0.0498 + 18 C14 1.2633 3.0344 -5.2558 C.ar 1 UNL1 -0.0123 + 19 N4 0.0956 4.1202 -7.1364 N.ar 1 UNL1 -0.3058 + 20 C15 -0.5921 3.8054 -8.2773 C.ar 1 UNL1 0.1004 + 21 N5 -0.3526 4.6548 -9.2502 N.ar 1 UNL1 -0.2437 + 22 C16 0.4995 5.5366 -8.7477 C.ar 1 UNL1 0.0458 + 23 C17 1.1720 2.3377 -4.0490 C.ar 1 UNL1 0.0687 + 24 N6 0.1058 1.3180 -2.2829 N.ar 1 UNL1 -0.2272 + 25 C18 1.4071 1.2367 -2.1420 C.ar 1 UNL1 0.1463 + 26 N7 2.1001 1.8299 -3.1695 N.ar 1 UNL1 -0.3376 + 27 C19 -0.0730 1.9990 -3.4623 C.ar 1 UNL1 0.0925 + 28 C20 -1.2589 2.3705 -4.1059 C.ar 1 UNL1 -0.0211 + 29 C21 -1.1754 3.0676 -5.3133 C.ar 1 UNL1 -0.0328 + 30 C22 -2.5901 2.0249 -3.5073 C.3 1 UNL1 -0.0376 + 31 H -0.5050 0.8747 0.0000 H 1 UNL1 0.1465 + 32 H 5.2340 0.0309 -0.0536 H 1 UNL1 0.1683 + 33 H 1.6865 -1.1065 1.9164 H 1 UNL1 0.0651 + 34 H 4.2081 -1.0140 1.7562 H 1 UNL1 0.0808 + 35 H -1.6824 -1.1340 0.5141 H 1 UNL1 0.0526 + 36 H -0.1409 -2.0239 0.5143 H 1 UNL1 0.0526 + 37 H 0.3912 -3.5176 -3.9304 H 1 UNL1 0.0619 + 38 H 0.8462 -2.8212 -1.5828 H 1 UNL1 0.0636 + 39 H -1.7169 -2.7490 -5.0286 H 1 UNL1 0.0633 + 40 H -3.2841 -1.3150 -3.7334 H 1 UNL1 0.0829 + 41 H 1.4583 5.7290 -6.7003 H 1 UNL1 0.0829 + 42 H 2.2166 3.2898 -5.6981 H 1 UNL1 0.0659 + 43 H -1.2438 2.9382 -8.3047 H 1 UNL1 0.1032 + 44 H 0.8829 6.3685 -9.3275 H 1 UNL1 0.0845 + 45 H 3.1139 1.8582 -3.2185 H 1 UNL1 0.1674 + 46 H -2.0826 3.3631 -5.8250 H 1 UNL1 0.0641 + 47 H -3.3861 2.3972 -4.1521 H 1 UNL1 0.0278 + 48 H -2.6758 0.9424 -3.4123 H 1 UNL1 0.0278 + 49 H -2.6758 2.4839 -2.5223 H 1 UNL1 0.0278 +@BOND + 1 1 2 2 + 2 1 4 ar + 3 1 15 ar + 4 3 5 1 + 5 3 8 1 + 6 3 31 1 + 7 4 7 ar + 8 4 32 1 + 9 5 6 ar + 10 5 15 ar + 11 6 7 ar + 12 6 33 1 + 13 7 34 1 + 14 8 12 1 + 15 8 35 1 + 16 8 36 1 + 17 9 10 ar + 18 9 11 ar + 19 9 37 1 + 20 10 12 ar + 21 10 38 1 + 22 11 14 ar + 23 11 39 1 + 24 12 13 ar + 25 13 14 ar + 26 14 40 1 + 27 15 25 1 + 28 16 19 ar + 29 16 22 ar + 30 16 41 1 + 31 17 18 ar + 32 17 19 1 + 33 17 29 ar + 34 18 23 ar + 35 18 42 1 + 36 19 20 ar + 37 20 21 ar + 38 20 43 1 + 39 21 22 ar + 40 22 44 1 + 41 23 26 ar + 42 23 27 ar + 43 24 25 ar + 44 24 27 ar + 45 25 26 ar + 46 26 45 1 + 47 27 28 ar + 48 28 29 ar + 49 28 30 1 + 50 29 46 1 + 51 30 47 1 + 52 30 48 1 + 53 30 49 1 diff --git a/examples/tautomers/keto-enol.json b/examples/tautomers/keto-enol.json new file mode 100644 index 0000000..f75175f --- /dev/null +++ b/examples/tautomers/keto-enol.json @@ -0,0 +1,62 @@ +{ + + "_comment": "This script generates a necessary checkpoint file to start a calibration.", + "name" : "keto_enol", + "input": { + "_comment": "Simulation requires an mmCIF file and a ffxml residue. Please specify the input directory under dir.", + "dir": "/home/mwieder/Work/Projects/protons-dev/protons/examples/tautomers/keto_enol/input/", + "structure": "keto_enol.cif" + }, + "output": { + "dir": "/home/mwieder/Work/Projects/protons-dev/protons/examples/tautomers/keto_enol/output/" + + }, + "forcefield": { + "_comment1": "Standard, included xml files. Don't forget -obc2 files if using implicit.", + "default": ["amber10-constph.xml", "gaff.xml", "tip3p.xml"], + "_comment2": "Custom generated xml file (needs to be in input dir.", + "user": ["keto_enol.xml"] + }, + "format_vars": { + "_comment1": "These variables are filled into file names for input and output when you use {} style syntax.", + "name": "1D" + }, + "system": { + "_comment1": "Systemwide settings, such as temperature, and long range method", + "_comment2": "If PME left out, nocutoff is used.", + "PME": { + "_comment": "Ewald + periodic system settings", + "ewald_error_tolerance": 1.0e-5, + "switching_distance_nm": 0.85, + "nonbonded_cutoff_nm": 1.0, + "barostat_interval": 25, + "pressure_atm": 1.0 + }, + "temperature_k": 300.0, + "salt_concentration_molar": 0.150 + }, + "integrator": { + "timestep_fs": 2.0, + "constraint_tolerance": 1.0e-7, + "collision_rate_per_ps": 1.0 + }, + "preprocessing": { + "minimization_tolerance_kjmol": 1.0e-5, + "minimization_max_iterations": 300, + "num_thermalization_steps": 100 + }, + "SAMS": { + "beta": 0.6, + "flatness_criterion": 0.1, + "sites": "multi", + "update_rule": "binary", + "min_burn": 200, + "min_slow": 200, + "min_fast": 200 + }, + "simulation" : { + "dcd_filename" : "keto_enol.dcd" , + "netCDF4" : "keto_enol.nc" + } + +} diff --git a/examples/tautomers/tautomer_utils.py b/examples/tautomers/tautomer_utils.py new file mode 100644 index 0000000..de28e13 --- /dev/null +++ b/examples/tautomers/tautomer_utils.py @@ -0,0 +1,348 @@ +from protons.app.ligands import prepare_calibration_systems, create_hydrogen_definitions, generate_protons_ffxml, prepare_mol2_for_parametrization +from protons.scripts.utilities import * +from protons.app import TautomerForceFieldProtonDrive, TautomerNCMCProtonDrive +from protons.app import MetadataReporter, TitrationReporter, NCMCReporter, SAMSReporter +from simtk import unit, openmm as mm +from saltswap.wrappers import Salinator +from protons.app.driver import SAMSApproach, Stage, UpdateRule +from tqdm import trange +import netCDF4 +import shutil +import os + + +def run_main(simulation, driver, pdb_object, settings): + """Main simulation loop.""" + + + # Add reporters + ncfile = netCDF4.Dataset(settings['simulation']['netCDF4'], "w") + dcd_output_name = settings['simulation']['dcd_filename'] + simulation.update_reporters.append(app.MetadataReporter(ncfile)) + simulation.reporters.append(app.DCDReporter(dcd_output_name, 100, enforcePeriodicBox=True)) + simulation.update_reporters.append(app.TitrationReporter(ncfile, 1)) + simulation.calibration_reporters.append(app.SAMSReporter(ncfile, 1)) + simulation.update_reporters.append(app.NCMCReporter(ncfile, 1, 0)) + total_iterations = 1000 + md_steps_between_updates = 1000 + + # MAIN SIMULATION LOOP STARTS HERE + pos = simulation.context.getState(getPositions=True).getPositions() + mm.app.PDBFile.writeFile(pdb_object.topology, pos, open(settings['output']['dir'] + '/tmp/start.pdb', 'w')) + + for i in trange(total_iterations, desc="NCMC attempts"): + if i == 50: + log.info("Simulation seems to be working. Suppressing debugging info.") + log.setLevel(log.INFO) + simulation.step(md_steps_between_updates) + # Perform a few COOH updates in between + driver.update("COOH", nattempts=3) + pos = simulation.context.getState(getPositions=True).getPositions() + mm.app.PDBFile.writeFile(pdb_object.topology, pos, open(settings['output']['dir'] + '/tmp/mcmc_'+str(i)+'.pdb', 'w')) + + if driver.calibration_state is not None: + if driver.calibration_state.approach is SAMSApproach.ONESITE: + simulation.update(1, pool="calibration") + else: + simulation.update(1) + simulation.adapt() + else: + simulation.update(1) + + +def setting_up_tautomer(settings, isomer_dictionary): + + pH = settings['pH'] + resname = settings['resname'] + + #retrieve input files + icalib = settings['input']['dir'] + '/' + settings['name'] + '.pdb' + input_mol2 = settings['input']['dir'] + '/' + settings['name'] + '_tautomer_set.mol2' + + #define location of set up files + hydrogen_def = settings['input']['dir'] + '/' + settings['name'] + '.hxml' + offxml = settings['input']['dir'] + '/' + settings['name'] + '.ffxml' + ocalib = settings['input']['dir'] + '/' + settings['name'] + '.cif' + + # Debugging/intermediate files + dhydrogen_fix = settings['input']['dir'] + '/' + settings['name'] + '_hydrogen_fixed1.mol2' + + ofolder = settings['output']['dir'] + + if not os.path.isdir(ofolder): + os.makedirs(ofolder) + + # Begin processing + + # process into mol2 + prepare_mol2_for_parametrization(input_mol2, dhydrogen_fix, patch_bonds=True, keep_intermediate=True) + + # parametrize + generate_protons_ffxml(dhydrogen_fix, isomer_dictionary, offxml, pH, resname=resname, tautomers=True, pdb_file_path=icalib) + # create hydrogens + create_hydrogen_definitions(offxml, hydrogen_def, tautomers=True) + + # prepare solvated system + prepare_calibration_systems(icalib, ofolder, offxml, hydrogen_def) + +def generate_simulation_and_driver(settings): + + ofolder = settings['output']['dir'] + input_pdbx_file = settings['input']['dir'] + '/' + settings['name'] + '.cif' + custom_xml = settings['input']['dir'] + '/' + settings['name'] + '.ffxml' + forcefield = app.ForceField('amber10-constph.xml', 'gaff.xml', custom_xml, 'tip3p.xml', 'ions_tip3p.xml') + + # Load structure + # The input should be an mmcif/pdbx file' + pdb_object = app.PDBxFile(input_pdbx_file) + + # Atoms , connectivity, residues + topology = pdb_object.topology + + # XYZ positions for every atom + positions = pdb_object.positions + + # Quick fix for histidines in topology + # Openmm relabels them HIS, which leads to them not being detected as + # titratable. Renaming them fixes this. + + for residue in topology.residues(): + if residue.name == "HIS": + residue.name = "HIP" + # TODO doublecheck if ASH GLH need to be renamed + elif residue.name == "ASP": + residue.name = "ASH" + elif residue.name == "GLU": + residue.name = "GLH" + + if os.path.isdir(ofolder): + shutil.rmtree(ofolder) + os.makedirs(ofolder) + + # Naming the output files + os.chdir(ofolder) + + if not os.path.isdir('tmp'): + os.makedirs('tmp') + + + # Structure preprocessing settings + if "preprocessing" in settings: + preproc: Dict[str, Any] = settings["preprocessing"] + + # Steps of MD before starting the main loop + num_thermalization_steps = int(preproc["num_thermalization_steps"]) + pre_run_minimization_tolerance: unit.Quantity = float( + preproc["minimization_tolerance_kjmol"] + ) * unit.kilojoule / unit.mole + minimization_max_iterations = int(preproc["minimization_max_iterations"]) + + # System Configuration + sysprops = settings["system"] + temperature = float(sysprops["temperature_k"]) * unit.kelvin + if "salt_concentration_molar" in sysprops: + salt_concentration: unit.Quantity = float( + sysprops["salt_concentration_molar"] + ) * unit.molar + else: + salt_concentration = None + + rigidWater = True + constraints = app.HBonds + + if "PME" in sysprops: + pmeprops = sysprops["PME"] + nonbondedMethod = app.PME + ewaldErrorTolerance = float(pmeprops["ewald_error_tolerance"]) + barostatInterval = int(pmeprops["barostat_interval"]) + switching_distance = float(pmeprops["switching_distance_nm"]) * unit.nanometers + nonbondedCutoff = float(pmeprops["nonbonded_cutoff_nm"]) * unit.nanometers + pressure = float(pmeprops["pressure_atm"]) * unit.atmosphere + + system = forcefield.createSystem( + topology, + nonbondedMethod=nonbondedMethod, + constraints=constraints, + rigidWater=rigidWater, + ewaldErrorTolerance=ewaldErrorTolerance, + nonbondedCutoff=nonbondedCutoff, + ) + for force in system.getForces(): + if isinstance(force, mm.NonbondedForce): + force.setUseSwitchingFunction(True) + + force.setSwitchingDistance(switching_distance) + + # TODO disable in implicit solvent + # NPT simulation + system.addForce(mm.MonteCarloBarostat(pressure, temperature, barostatInterval)) + else: + pressure = None + system = forcefield.createSystem( + topology, + nonbondedMethod=app.NoCutoff, + constraints=app.HBonds, + rigidWater=True, + ) + + # Integrator options + integrator_opts = settings["integrator"] + timestep = integrator_opts["timestep_fs"] * unit.femtosecond + constraint_tolerance = integrator_opts["constraint_tolerance"] + collision_rate = integrator_opts["collision_rate_per_ps"] / unit.picosecond + number_R_steps = 1 + + integrator = ExternalGBAOABIntegrator( + number_R_steps=number_R_steps, + temperature=temperature, + collision_rate=collision_rate, + timestep=timestep, + constraint_tolerance=constraint_tolerance, + ) + ncmc_propagation_integrator = ExternalGBAOABIntegrator( + number_R_steps=number_R_steps, + temperature=temperature, + collision_rate=collision_rate, + timestep=timestep, + constraint_tolerance=constraint_tolerance, + ) + + # Define a compound integrator + compound_integrator = mm.CompoundIntegrator() + compound_integrator.addIntegrator(integrator) + compound_integrator.addIntegrator(ncmc_propagation_integrator) + compound_integrator.setCurrentIntegrator(0) + + # Script specific settings + + # Register the timeout handling + driver = TautomerForceFieldProtonDrive( + temperature, + topology, + system, + forcefield, + [custom_xml] + ["amber10-constph.xml"], + pressure=pressure, + perturbations_per_trial=10000, + propagations_per_step=1, + ) + + if "reference_free_energies" in settings: + # calibrated free energy values can be provided here + gk_dict = settings["reference_free_energies"] + + # Clean comments inside dictionary + to_delete = list() + for key in gk_dict.keys(): + if key.startswith("_"): + to_delete.append(key) + + for key in to_delete: + del (gk_dict[key]) + + # Make arrays + for key, value in gk_dict.items(): + # Convert to array of float + val_array = np.asarray(value).astype(np.float64) + if not val_array.ndim == 1: + raise ValueError("Reference free energies should be simple lists.") + gk_dict[key] = val_array + + driver.import_gk_values(gk_dict) + + # TODO allow platform specification for setup + + #platform = mm.Platform.getPlatformByName("CUDA") +# properties = {"Precision": "double"} + platform = mm.Platform.getPlatformByName("CPU") + + # Set up calibration mode + # SAMS settings + + if "SAMS" in settings: + sams = settings["SAMS"] + + beta_burnin = float(sams["beta"]) + min_burnin = int(sams["min_burn"]) + min_slow = int(sams["min_slow"]) + min_fast = int(sams["min_fast"]) + + flatness_criterion = float(sams["flatness_criterion"]) + if sams["update_rule"] == "binary": + update_rule = UpdateRule.BINARY + elif sams["update_rule"] == "global": + update_rule = UpdateRule.GLOBAL + else: + update_rule = UpdateRule.BINARY + + # Assumes calibration residue is always the last titration group if onesite + + if sams["sites"] == "multi": + driver.enable_calibration( + approach=SAMSApproach.MULTISITE, + update_rule=update_rule, + flatness_criterion=flatness_criterion, + min_burn=min_burnin, + min_slow=min_slow, + min_fast=min_fast, + beta_sams=beta_burnin, + ) + elif sams["sites"] == "one": + if "group_index" in sams: + calibration_titration_group_index = int(sams["group_index"]) + else: + calibration_titration_group_index = len(driver.titrationGroups) - 1 + + driver.enable_calibration( + approach=SAMSApproach.ONESITE, + group_index=calibration_titration_group_index, + update_rule=update_rule, + flatness_criterion=flatness_criterion, + min_burn=min_burnin, + min_slow=min_slow, + min_fast=min_fast, + ) + # Define residue pools + pools = {"calibration": [calibration_titration_group_index]} + # TODO the pooling feature could eventually be exposed in the json + driver.define_pools(pools) + properties = None + # Create simulation object + # If calibration is required, this class will automatically deal with it. + simulation = app.ConstantPHSimulation( + topology, + system, + compound_integrator, + driver, + platform=platform, + platformProperties=properties, + ) + simulation.context.setPositions(positions) + + # After the simulation system has been defined, we can add salt to the system using saltswap. + if salt_concentration is not None and "PME" in sysprops: + salinator = Salinator( + context=simulation.context, + system=system, + topology=topology, + ncmc_integrator=compound_integrator.getIntegrator(1), + salt_concentration=salt_concentration, + pressure=pressure, + temperature=temperature, + ) + salinator.neutralize() + salinator.initialize_concentration() + else: + salinator = None + + # Minimize the initial configuration to remove bad contacts + if "preprocessing" in settings: + simulation.minimizeEnergy( + tolerance=pre_run_minimization_tolerance, + maxIterations=minimization_max_iterations, + ) + # Slightly equilibrate the system, detect early issues. + simulation.step(num_thermalization_steps) + + topology_file_content = open(input_pdbx_file, "r").read() + return simulation, driver, pdb_object diff --git a/examples/tautomers/tautomers.ipynb b/examples/tautomers/tautomers.ipynb new file mode 100644 index 0000000..dbe9a77 --- /dev/null +++ b/examples/tautomers/tautomers.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[__init__.py:415 - wrapper() ] CACHEDIR=/home/mwieder/.cache/matplotlib\n", + "[font_manager.py:1362 - () ] Using fontManager instance from /home/mwieder/.cache/matplotlib/fontlist-v300.json\n", + "[pyplot.py:211 - switch_backend() ] Loaded backend module://ipykernel.pylab.backend_inline version unknown.\n", + "[pyplot.py:211 - switch_backend() ] Loaded backend module://ipykernel.pylab.backend_inline version unknown.\n" + ] + } + ], + "source": [ + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))\n", + "from protons.app import logger\n", + "from protons.app.logger import log\n", + "from protons import app\n", + "from simtk import openmm as mm\n", + "import logging\n", + "log.setLevel(logging.DEBUG)\n", + "import yaml\n", + "import tautomer_utils\n", + "from protons.app.driver import SAMSApproach, Stage, UpdateRule" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mwieder/anaconda3/envs/protons-dev/lib/python3.6/site-packages/ipykernel_launcher.py:1: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "settings = yaml.load(open('bmi.json', \"r\"))\n", + "settings['pH'] = 7.8\n", + "settings['resname'] = 'UNL'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[ligands.py:1598 - generate_protons_ffxml() ] Processing Epik output...\n", + "[ligands.py:1601 - generate_protons_ffxml() ] Parametrizing the isomers...\n", + "[ligands.py:1613 - generate_protons_ffxml() ] ffxml generation for 0\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "omega returned error code 0", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mt2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m \u001b[0;34m'log_population'\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m'1.0'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'net_charge'\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0misomer_dictionary\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mt1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt2\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mtautomer_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetting_up_tautomer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0misomer_dictionary\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Work/Projects/protons-dev/protons/examples/tautomers/tautomer_utils.py\u001b[0m in \u001b[0;36msetting_up_tautomer\u001b[0;34m(settings, isomer_dictionary)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;31m# parametrize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0mgenerate_protons_ffxml\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdhydrogen_fix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0misomer_dictionary\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffxml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpH\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtautomers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpdb_file_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0micalib\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m \u001b[0;31m# create hydrogens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mcreate_hydrogen_definitions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moffxml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhydrogen_def\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtautomers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/protons-dev/lib/python3.6/site-packages/protons-0.0.1a6+20.g79ce0da.dirty-py3.6.egg/protons/app/ligands.py\u001b[0m in \u001b[0;36mgenerate_protons_ffxml\u001b[0;34m(inputmol2, isomer_dicts, outputffxml, pH, resname, omega_max_confs, tautomers, pdb_file_path)\u001b[0m\n\u001b[1;32m 1613\u001b[0m \u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ffxml generation for {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0misomer_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1614\u001b[0m ffxml = omtff.generateForceFieldFromMolecules(\n\u001b[0;32m-> 1615\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0moemolecule\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0momega_max_confs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0momega_max_confs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1616\u001b[0m )\n\u001b[1;32m 1617\u001b[0m \u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mffxml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/protons-dev/lib/python3.6/site-packages/protons-0.0.1a6+20.g79ce0da.dirty-py3.6.egg/protons/app/forcefield_generators.py\u001b[0m in \u001b[0;36mgenerateForceFieldFromMolecules\u001b[0;34m(molecules, ignoreFailures, generateUniqueNames, normalize, gaff_version, use_recommended, omega_max_confs)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 502\u001b[0m molecule = assignELF10charges(\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mmolecule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstrictStereo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_confs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnconf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m )\n\u001b[1;32m 505\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/protons-dev/lib/python3.6/site-packages/protons-0.0.1a6+20.g79ce0da.dirty-py3.6.egg/protons/app/openeye.py\u001b[0m in \u001b[0;36massignELF10charges\u001b[0;34m(molecule, max_confs, strictStereo)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;31m# Generate up to max_confs conformers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m mol_copy = generate_conformers(\n\u001b[0;32m--> 117\u001b[0;31m \u001b[0mmol_copy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_confs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_confs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstrictStereo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstrictStereo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 118\u001b[0m )\n\u001b[1;32m 119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/protons-dev/lib/python3.6/site-packages/protons-0.0.1a6+20.g79ce0da.dirty-py3.6.egg/protons/app/openeye.py\u001b[0m in \u001b[0;36mgenerate_conformers\u001b[0;34m(molecule, max_confs, strictStereo, ewindow, rms_threshold, strictTypes)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0momega\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmolcopy\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# generate conformation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 420\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"omega returned error code %d\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 422\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmolcopy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: omega returned error code 0" + ] + } + ], + "source": [ + "t1 = { 'log_population' : '1.0', 'net_charge' : 0 }\n", + "t2 = { 'log_population' : '1.0', 'net_charge' : 0 }\n", + "isomer_dictionary = [ t1, t2 ]\n", + "tautomer_utils.setting_up_tautomer(settings, isomer_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "simulation, driver, pdb_object = tautomer_utils.generate_simulation_and_driver(settings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tautomer_utils.run_main(simulation, driver, pdb_object, settings)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "protons-dev", + "language": "python", + "name": "protons-dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/protons/app/__init__.py b/protons/app/__init__.py index 7352da2..b66a7b5 100644 --- a/protons/app/__init__.py +++ b/protons/app/__init__.py @@ -7,7 +7,7 @@ from .topology import Topology from .calibration import SAMSCalibrationEngine from .simulation import ConstantPHSimulation -from .driver import ForceFieldProtonDrive, AmberProtonDrive, NCMCProtonDrive +from .driver import ForceFieldProtonDrive, AmberProtonDrive, NCMCProtonDrive, TautomerForceFieldProtonDrive, TautomerNCMCProtonDrive from .proposals import UniformProposal, DoubleProposal, CategoricalProposal from .integrators import GBAOABIntegrator from .modeller import Modeller diff --git a/protons/app/driver.py b/protons/app/driver.py index 0303b88..903ca53 100644 --- a/protons/app/driver.py +++ b/protons/app/driver.py @@ -33,6 +33,7 @@ from typing import Dict, List, Optional, Tuple, Any, Callable from .integrators import GHMCIntegrator, GBAOABIntegrator from enum import Enum +from collections import defaultdict kB = (1.0 * unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA).in_units_of( unit.kilojoules_per_mole / unit.kelvin @@ -3052,6 +3053,7 @@ def _attempt_state_change(self, proposal, residue_pool=None, reject_on_nan=False # Store work history attempt_data.work = work + log.debug('Calculate work for change from ' + str(initial_titration_states) + ' to ' + str(final_titration_states)) log_P_accept = -work log_P_accept += logp_ratio_residue_proposal @@ -3073,6 +3075,8 @@ def _attempt_state_change(self, proposal, residue_pool=None, reject_on_nan=False if accept_move: # Accept. + log.debug('accept ncmc titration state change ...') + if initial_titration_states != final_titration_states: self.naccepted += 1 # Update titration states. @@ -3105,6 +3109,8 @@ def _attempt_state_change(self, proposal, residue_pool=None, reject_on_nan=False else: # Reject. + log.debug('reject ncmc titration state change ...') + if initial_titration_states != final_titration_states: self.nrejected += 1 # Restore titration states. @@ -3138,7 +3144,7 @@ def _attempt_state_change(self, proposal, residue_pool=None, reject_on_nan=False except Exception as err: if str(err) == "Particle coordinate is nan" and reject_on_nan: - logging.warning("NaN during NCMC move, rejecting") + log.warning("NaN during NCMC move, rejecting") # Reject. if initial_titration_states != final_titration_states: self.nrejected += 1 @@ -3859,3 +3865,804 @@ def strip_in_unit_system(quant, unit_system=unit.md_unit_system, compatible_with return quant.value_in_unit_system(unit_system) else: return quant + + + + + +class TautomerNCMCProtonDrive(NCMCProtonDrive): + + def __init__(self, temperature, topology, system, pressure=None, perturbations_per_trial=0, propagations_per_step=1): + + super().__init__(temperature, topology, system, pressure=pressure, perturbations_per_trial=perturbations_per_trial, propagations_per_step=propagations_per_step) + # Store force object pointers. + force_classes_to_update = ['NonbondedForce', 'HarmonicBondForce', 'HarmonicAngleForce', 'PeriodicTorsionForce'] + self.forces_to_update = list() + for force_index in range(self.system.getNumForces()): + force = self.system.getForce(force_index) + if force.__class__.__name__ in force_classes_to_update: + self.forces_to_update.append(force) + + + + def _cache_force(self, titration_group_index, titration_state_index): + """ + Cache the force parameters for a single tautomer state. + + Parameters + ---------- + titration_group_index : int + Index of the group + titration_state_index : int + + Call this function to set up the 'forces' information for a single tautomer state. + Every tautomer state has a list called forces, which stores parameters for all forces that need updating. + Inside each list entry is a dictionary that always contains an entry called `atoms` 'bonds', 'angles' and ''torsions'. + Parameters for atoms, bonds and angles are matched between the different tautomer states. Torsions are + handled differently: all torsions are added and only the force constant is scaled in the different states. + NonbondedForces also have an entry called `exceptions`, containing exception parameters. + + Returns + ------- + + """ + + log.info('#########################') + log.info('Titration group index {}'.format(titration_group_index)) + log.info('titration state index {}'.format(titration_state_index)) + log.info('#########################') + + titration_group = self.titrationGroups[titration_group_index] + titration_state = self.titrationGroups[titration_group_index][titration_state_index] + + + parameters = titration_state.lookup_for_parameters + + + # genearte atom idx to atom name dictionaries + atom_name_to_openMM_indice = dict() + openMM_indices_to_atom_name = dict() + atom_indices = titration_group.atom_indices + # the ffxml indices are sequential - the openMM indices are not. Since + # the openMM indices and their order are known it is possible to + # map the ffxml indices to the openMM indices - atom_indices are generated in + # _add_xml_titration_groups() + ffxml_indices_to_openMM_indices = dict(zip(list(range(len(atom_indices))), atom_indices)) + + + for atom_name in parameters['nonbonded']: + idx = ffxml_indices_to_openMM_indices[parameters['nonbonded'][atom_name]['ffxml_index']] + openMM_indices_to_atom_name[idx] = atom_name + atom_name_to_openMM_indice[atom_name] = idx + + titration_group.atom_indices_to_atom_name = openMM_indices_to_atom_name + + # Store the parameters per individual force + f_params = list() + + + for force_index, force in enumerate(self.forces_to_update): + + # Get name of force class. + force_classname = force.__class__.__name__ + + # cache atom parameters for current state + if force_classname == 'NonbondedForce': + f_params.append(dict(atoms=defaultdict())) + + for atom_index in atom_indices: + atom_name = openMM_indices_to_atom_name[atom_index] + current_parameters = {key: value for (key, value) in parameters['nonbonded'][atom_name].items()} + current_parameters['name'] = atom_name + f_params[force_index]['atoms'][atom_index] = current_parameters + + elif force_classname == 'HarmonicBondForce': + f_params.append(dict(bonds=defaultdict())) + # only heavy atom - heavy atom bonds are regarded + for bond_index in range(force.getNumBonds()): + a1, a2, length, k = force.getBondParameters(bond_index) + # test if this bond is a ligand bond + if not all(x in atom_indices for x in [a1,a2]): + continue + + atom_name1 = openMM_indices_to_atom_name[a1] + atom_name2 = openMM_indices_to_atom_name[a2] + + # update current parameters with particular titration state + current_parameters = {key: value for (key, value) in parameters['bonded'][(atom_name1,atom_name2)].items()} + current_parameters['atom1_idx'], current_parameters['atom2_idx'] = a1, a2 + current_parameters['atom_name1'], current_parameters['atom_name2'] = atom_name1, atom_name2 + f_params[force_index]['bonds'][bond_index] = current_parameters + + elif force_classname == 'HarmonicAngleForce': + f_params.append(dict(angles=defaultdict())) + for angle_index in range(force.getNumAngles()): + a1, a2, a3, angle_value, k = force.getAngleParameters(angle_index) + # test if this angle is a ligand angle + if not all(x in atom_indices for x in [a1,a2,a3]): + continue + + atom_name1,atom_name2,atom_name3 = openMM_indices_to_atom_name[a1],openMM_indices_to_atom_name[a2],openMM_indices_to_atom_name[a3] + current_parameters = {key: value for (key, value) in parameters['angle'][(atom_name1,atom_name2,atom_name3)].items()} + current_parameters['atom1_idx'], current_parameters['atom2_idx'], current_parameters['atom3_idx'] = a1, a2, a3 + current_parameters['atom_name1'], current_parameters['atom_name2'], current_parameters['atom_name3'] = atom_name1, atom_name2, atom_name3 + # update current parameters with particular titration state + f_params[force_index]['angles'][angle_index] = current_parameters + + # set torsion parameters + elif force_classname == 'PeriodicTorsionForce': + f_params.append(dict(torsion=list(), ks=defaultdict())) + # torsions are initialized differently than the other forces + # for each state all its torsions are added with force constant zero + # and two lists generated that have + # -) the index of all torsions that are real for this state and + # -) the force constants for each torsion + for torsion in parameters['torsion']: + idx = force.addTorsion(atom_name_to_openMM_indice[parameters['torsion'][torsion]['name1']], + atom_name_to_openMM_indice[parameters['torsion'][torsion]['name2']], + atom_name_to_openMM_indice[parameters['torsion'][torsion]['name3']], + atom_name_to_openMM_indice[parameters['torsion'][torsion]['name4']], + int(parameters['torsion'][torsion]['periodicity']), + float(parameters['torsion'][torsion]['phase']), + float(0.0)) + f_params[force_index]['torsion'].append(idx) + f_params[force_index]['ks'][idx] = parameters['torsion'][torsion]['k'] + + else: + raise Exception("Don't know how to update force type '%s'" % force_classname) + + # Update exceptions + # TODO: Handle Custom forces. + if force_classname == 'NonbondedForce': + f_params[force_index]['exceptions'] = list() + for e_ix, exception_index in enumerate(titration_group.exception_indices): + [particle1, particle2, chargeProd, sigma, epsilon] = map( + strip_in_unit_system, force.getExceptionParameters(exception_index)) + + # NOTE: mw: not sure if this does what it is intendent to do + atom_name1 = openMM_indices_to_atom_name[particle1] + atom_name2 = openMM_indices_to_atom_name[particle2] + parameters['nonbonded'][atom_name1]['charge'] + + # Deal with exceptions between atoms outside of titratable residue + try: + charge_1 = parameters['nonbonded'][atom_name1]['charge'] + except KeyError: + charge_1 = strip_in_unit_system(force.getParticleParameters(particle1)[0]) + try: + charge_2 = parameters['nonbonded'][atom_name2]['charge'] + except KeyError: + charge_2 = strip_in_unit_system(force.getParticleParameters(particle2)[0]) + + chargeProd = self.coulomb14scale * charge_1 * charge_2 + + # chargeprod and sigma cannot be identically zero or else we risk the error: + # Exception: updateParametersInContext: The number of non-excluded exceptions has changed + # TODO: Once OpenMM interface permits this, omit this code. + if 2 * chargeProd == chargeProd: + chargeProd = sys.float_info.epsilon + if 2 * epsilon == epsilon: + epsilon = sys.float_info.epsilon + + # store specific local variables in dict by name + exc_dict = dict() + for i in ('exception_index', 'particle1', 'particle2', 'chargeProd', 'sigma', 'epsilon'): + exc_dict[i] = locals()[i] + f_params[force_index]['exceptions'].append(exc_dict) + + self.titrationGroups[titration_group_index][titration_state_index].forces = f_params + + def _update_forces(self, titration_group_index, final_titration_state_index, initial_titration_state_index=None, fractional_titration_state=1.0, final=False): + """ + Update the force parameters to a new tautomer state by reading them from the cache. + + Notes + ----- + * Please ensure that the context is updated after calling this function, by using + `force.updateParametersInContext(context)` for each force that has been updated. + + Parameters + ---------- + titration_group_index : int + Index of the group that is changing state + final_titration_state_index : int + Index of the state of the chosen residue + initial_titration_state_index : int, optional, default=None + If blending two titration states, the initial titration state to blend. + If `None`, set to `titration_state_index` + fractional_titration_state : float, optional, default=1.0 + Fraction of `titration_state_index` to be blended with `initial_titration_state_index`. + If 0.0, `initial_titration_state_index` is fully active; if 1.0, `titration_state_index` is fully active. + + Notes + ----- + * Every tautomer state has a list called forces, which stores parameters for all forces that need updating. + * Inside each list entry is a dictionary that contains the parameters for the forces. The atom, bond, angle parameter + dictionaries use the force index as key. The torsion parameter dict has a list with the + indices of torsions that are turned on at this tautomer state and a dictionary with the + index as key and the force constant as value. + * NonbondedForces also have an entry called `exceptions`, containing exception parameters. + + """ + # `initial_titration_state_index` should have no effect if not specified, so set it identical to + # `final_titration_state_index` in that case + if initial_titration_state_index is None: + initial_titration_state_index = final_titration_state_index + + # Retrieve cached force parameters fro this titration state. + cache_initial_forces = self.titrationGroups[titration_group_index][initial_titration_state_index].forces + cache_final_forces = self.titrationGroups[titration_group_index][final_titration_state_index].forces + + atom_name_by_atom_index = self.titrationGroups[titration_group_index].atom_indices_to_atom_name + + # Modify parameters + for force_index, force in enumerate(self.forces_to_update): + # Get name of force class. + force_classname = force.__class__.__name__ + if force_classname == 'NonbondedForce': + + # Update forces using appropriately blended parameters + for atom_idx in atom_name_by_atom_index: + atom_initial = cache_initial_forces[force_index]['atoms'][atom_idx] + atom_final = cache_final_forces[force_index]['atoms'][atom_idx] + atom = {} + + # only change parameters if needed, otherwise keep old parameters + if atom_initial['charge'] != atom_final['charge'] or atom_initial['sigma'] != atom_final['sigma'] or atom_initial['epsilon'] != atom_final['epsilon']: + + #charge is scaled seperat from sigma and epsiolon to enable shielding of charges if charge increases + for parameter_name in ['sigma', 'epsilon']: + scale = fractional_titration_state + #if charges increase epsilon and sigma have to increase faster to shield charges + if float(atom_final['charge']) > float(atom_initial['charge']): + scale = min(scale* 2.0, 1.0) + atom[parameter_name] = (1.0 - scale) * atom_initial[parameter_name] + scale * atom_final[parameter_name] + + for parameter_name in ['charge']: + # if charges are decresed they should decrease faster to avoid unshielded charges + scale = fractional_titration_state + if float(atom_final['charge']) < float(atom_initial['charge']): + scale = min(scale* 2.0, 1.0) + atom[parameter_name] = (1.0 - scale) * atom_initial[parameter_name] + scale * atom_final[parameter_name] + else: + # keep initial parameters since nothing changed + for parameter_name in ['sigma', 'epsilon', 'charge']: + atom[parameter_name] = atom_initial[parameter_name] + + force.setParticleParameters(atom_idx, atom['charge'], atom['sigma'], atom['epsilon']) + + + for (exc_initial, exc_final) in zip(cache_initial_forces[force_index]['exceptions'], cache_final_forces[force_index]['exceptions']): + + exc = {key: exc_initial[key] for key in ['exception_index', 'particle1', 'particle2']} + for parameter_name in ['chargeProd', 'sigma', 'epsilon']: + exc[parameter_name] = (1.0 - fractional_titration_state) * exc_initial[parameter_name] + \ + fractional_titration_state * exc_final[parameter_name] + force.setExceptionParameters( + exc['exception_index'], exc['particle1'], exc['particle2'], exc['chargeProd'], exc['sigma'], exc['epsilon']) + + + elif force_classname == 'HarmonicBondForce': + # ensure that there are equal number of chached bonded parameters present + if len((cache_initial_forces[force_index]['bonds'])) != len((cache_final_forces[force_index]['bonds'])): + raise ValueError('Non equal number of bonded forces. Abort.') + + for bond_idx in cache_initial_forces[force_index]['bonds']: + bond_initial = cache_initial_forces[force_index]['bonds'][bond_idx] + bond_final = cache_final_forces[force_index]['bonds'][bond_idx] + bond = {'atom1_idx' : bond_initial['atom1_idx'], 'atom2_idx' : bond_initial['atom2_idx'], 'bond_index' : bond_idx} + + # update bonds that changed parameters + if bond_initial['length'] != bond_final['length'] or bond_initial['k'] != bond_final['k']: + scale = fractional_titration_state + for parameter_name in ['length', 'k']: + # generate new, interpolated parameters + bond[parameter_name] = (1.0 - scale) * float(bond_initial[parameter_name]) + scale * float(bond_final[parameter_name]) + else: + for parameter_name in ['length', 'k']: + # use old parameters + bond[parameter_name] = bond_initial[parameter_name] + + # set new parameters using atom indices + force.setBondParameters(bond_idx, bond['atom1_idx'], bond['atom2_idx'], float(bond['length']), float(bond['k'])) + + elif force_classname == 'HarmonicAngleForce': + if len((cache_initial_forces[force_index]['angles'])) != len((cache_final_forces[force_index]['angles'])): + raise ValueError('Non equal number of angle forces. Abort.') + + for angle_idx in cache_initial_forces[force_index]['angles']: + angle_initial = cache_initial_forces[force_index]['angles'][angle_idx] + angle_final = cache_final_forces[force_index]['angles'][angle_idx] + + a1, a2, a3 = angle_initial['atom1_idx'], angle_initial['atom2_idx'], angle_initial['atom3_idx'] + angle = {} + + # update angles that changed parameters + if angle_initial['angle'] != angle_final['angle'] or angle_initial['k'] != angle_final['k']: + scale = fractional_titration_state + for parameter_name in ['angle', 'k']: + angle[parameter_name] = (1.0 - scale) * float(angle_initial[parameter_name]) + scale * float(angle_final[parameter_name]) + else: + for parameter_name in ['angle', 'k']: + # use old parameters + angle[parameter_name] = float(angle_initial[parameter_name]) + + force.setAngleParameters(angle_idx, a1, a2, a3, angle['angle'], angle['k']) + + elif force_classname == 'PeriodicTorsionForce': + + for idx in range(force.getNumTorsions()): + a1, a2, a3, a4, periodicity, phase, k = map(strip_in_unit_system, force.getTorsionParameters(idx)) + # test if this torsion is a ligand torsion + if not all(x in atom_name_by_atom_index.keys() for x in [a1,a2,a3,a4]): + continue + + new_k = None + # initial state torsions are scaled down + if idx in cache_initial_forces[force_index]['torsion']: + if fractional_titration_state <= 0.5: + scaling = (1.0 - (2* fractional_titration_state)) + else: + scaling = 0.0 + new_k = scaling * float(cache_initial_forces[force_index]['ks'][idx]) + + # final state torsions are scaled up + if idx in cache_final_forces[force_index]['torsion']: + if fractional_titration_state <= 0.5: + scaling = 0.0 + else: + scaling = (2* (fractional_titration_state - 0.5)) + new_k = scaling * float(cache_final_forces[force_index]['ks'][idx]) + + # other state torsions are kept with k = 0.0 + if idx not in cache_initial_forces[force_index]['torsion'] and idx not in cache_final_forces[force_index]['torsion']: + new_k = 0.0 + force.setTorsionParameters(idx, a1, a2, a3, a4, periodicity, phase, new_k) + else: + raise Exception("Don't know how to update force type '%s'" % force_classname) + + + +class TautomerForceFieldProtonDrive(TautomerNCMCProtonDrive): + + def __init__(self, temperature, topology, system, forcefield, ffxml_files, pressure=None, perturbations_per_trial=0, propagations_per_step=1, residues_by_name=None, residues_by_index=None): + + + """ + Initialize a Monte Carlo titration driver for simulation of tautomer states + + Parameters + ---------- + temperature : simtk.unit.Quantity compatible with kelvin + Temperature at which the system is to be simulated. + topology : protons.app.Topology + Topology of the system + system : simtk.openmm.System + System to be titrated, containing all possible protonation sites. + ffxml_files : str or list of str + Single ffxml filename, or list of ffxml filenames containing protons information. + forcefield : simtk.openmm.app.ForceField + ForceField parameters used to make a system. + pressure : simtk.unit.Quantity compatible with atmospheres, optional + For explicit solvent simulations, the pressure. + perturbations_per_trial : int, optional, default=0 + Number of steps per NCMC switching trial, or 0 if instantaneous Monte Carlo is to be used. + propagations_per_step : int, optional, default=1 + Number of propagation steps in between perturbation steps. + + #NOTE: here we indicate the residue that should be treated as titratable + + residues_by_index : list of int + Residues in topology by index that should be treated as titratable + residues_by_name : list of str + Residues by name in topology that should be treated as titratable + + Notes + ----- + If neither residues_by_index, or residues_by_name are specified, all possible residues with Protons parameters + will be treated. + + """ + # Input validation + if residues_by_name is not None: + if not isinstance(residues_by_name, list): + raise TypeError("residues_by_name needs to be a list") + + if residues_by_index is not None: + if not isinstance(residues_by_index, list): + raise TypeError("residues_by_index needs to be a list") + + super(TautomerForceFieldProtonDrive, self).__init__(temperature, topology, system, pressure, perturbations_per_trial=perturbations_per_trial, propagations_per_step=propagations_per_step) + + ffxml_residues = self._parse_ffxml_files(ffxml_files) + + # Collect all of the residues that need to be treated + all_residues = list(topology.residues()) + selected_residue_indices = list() + + # Validate user specified indices + if residues_by_index is not None: + for residue_index in residues_by_index: + residue = all_residues[residue_index] + if residue.name not in ffxml_residues: + raise ValueError("Residue '{}:{}' is not treatable using protons. Please provide Protons parameters using an ffxml file, or deselect it.".format(residue.name, residue.index)) + selected_residue_indices.extend(residues_by_index) + + # Validate user specified residue names + if residues_by_name is not None: + for residue_name in residues_by_name: + if residue_name not in ffxml_residues: + raise ValueError("Residue type '{}' is not a protons compatible residue. Please provide Protons parameters using an ffxml file, or deselect it.".format(residue_name)) + + for residue in all_residues: + if residue.name in residues_by_name: + selected_residue_indices.append(residue.index) + log.info('Selected residue indeces: {}'.format(selected_residue_indices)) + # If no names or indices are specified, make all compatible residues titratable + if residues_by_name is None and residues_by_index is None: + for residue in all_residues: + if residue.name in ffxml_residues: + selected_residue_indices.append(residue.index) + + # Remove duplicate indices and sort + selected_residue_indices = sorted(list(set(selected_residue_indices))) + self._add_xml_titration_groups(topology, forcefield, ffxml_residues, selected_residue_indices) + + return + + def _parse_ffxml_files(self, ffxml_files): + """ + Read an ffxml file, or a list of ffxml files, and extract the residues that have Protons information. + + Parameters + ---------- + ffxml_files single object, or list of + - a file name/path + - a file object + - a file-like object + - a URL using the HTTP or FTP protocol + The file should contain ffxml residues that have a block. + + Returns + ------- + ffxml_residues - dict of all residue blocks that were detected, with residue names as keys. + + """ + if not isinstance(ffxml_files, list): + ffxml_files = [ffxml_files] + + xmltrees = list() + ffxml_residues = dict() + # Collect xml parameters from provided input files + for file in ffxml_files: + try: + tree = etree.parse(file) + xmltrees.append(tree) + except IOError: + full_path = os.path.join(os.path.dirname(__file__), 'data', file) + tree = etree.parse(full_path) + xmltrees.append(tree) + + for xmltree in xmltrees: + # All residues that contain a protons block + for xml_residue in xmltree.xpath('/ForceField/Residues/Residue[Protons]'): + xml_resname = xml_residue.get("name") + if not xml_resname in ffxml_residues: + # Store the protons block of the residue + ffxml_residues[xml_resname] = xml_residue + else: + raise ValueError("Duplicate residue name found in parameters: {}".format(xml_resname)) + + return ffxml_residues + + + def _add_xml_titration_groups(self, topology, forcefield, ffxml_residues, selected_residue_indices): + """ + Create tautomer groups for the selected residues in the topology, using ffxml information gathered earlier. + Parameters + ---------- + topology - OpenMM Topology object + forcefield - OpenMM ForceField object + ffxml_residues - dict of residue ffxml templates + selected_residue_indices - Residues to treat using Protons. + + Returns + ------- + + """ + + all_residues = list(topology.residues()) + bonded_to_atoms_list = forcefield._buildBondedToAtomList(topology) + + # Extract number of tautomer groups. + ngroups = len(selected_residue_indices) + # Define tautomer states. + for group_index in range(ngroups): + # Extract information about this titration group. + residue_index = selected_residue_indices[group_index] + residue = all_residues[residue_index] + + template = forcefield._templates[residue.name] + # Find the system indices of the template atoms for this residue + matches = app.forcefield._matchResidue(residue, template, bonded_to_atoms_list) + + if matches is None: + raise ValueError("Could not match residue atoms to template.") + + atom_indices = [atom.index for atom in residue.atoms()] + # Sort the atom indices in the template in the same order as the topology indices. + atom_indices = [id for (match, id) in sorted(zip(matches, atom_indices))] + protons_block = ffxml_residues[residue.name].xpath('Protons')[0] + + residue_pka = None + pka_data = None + # Add pka adjustment features + if residue.name in available_pkas: + residue_pka = available_pkas[residue.name] + + if len(protons_block.findall('State/Condition')) > 0 and residue_pka is None: + pka_data = DataFrame(columns=["pH", 'Temperature (K)', 'Ionic strength (mM)', 'log population']) + for state_index, state_block in enumerate(protons_block.xpath('State')): + for condition in state_block.xpath('Condition'): + row = dict(State=state_index) + try: + row['pH'] = float(condition.get('pH')) + except TypeError: + row['pH'] = None + try: + row['Temperature (K)'] = float(condition.get('temperature_kelvin')) + except TypeError: + row['Temperature (K)'] = None + try: + row['Ionic strength (mM)'] = float(condition.get('ionic_strength_mM')) + except TypeError: + row['Ionic strength (mM)'] = None + logpop = condition.get('log_population') + try: + row['log population'] = float(logpop) + except TypeError: + raise ValueError("The log population provided can not be converted to a number :'{}'".format(logpop)) + pka_data = pka_data.append(row, ignore_index=True) + + # Create a new group with the given indices + self._add_titratable_group(atom_indices, + residue.name, + name="Chain {} Residue {} {}".format(residue.chain.id, residue.name, residue.id), + residue_pka=residue_pka, pka_data=pka_data) + + # Define titration states. + log.info('- Parameters for the different states as defined in ffxml') + + ################################################ + ################################################ + + for state_block in protons_block.xpath("State"): + # Extract charges for this titration state. + # is defined in elementary_charge units + state_index = int(state_block.get("index")) + + relative_energy = float(state_block.get("g_k")) * unit.kilocalories_per_mole + # Get proton count. + proton_count = int(state_block.get("proton_count")) + # Read in parameters for state from ffxl and generate lookup dicts + parameters_for_current_state = self._generating_parameter_for_current_state(state_block) + + # Create titration state. + self._add_titration_state(group_index, relative_energy, parameters_for_current_state, proton_count) + #self._look_at_torsions(group_index, state_index) + self._cache_force(group_index, state_index) + + # Set default state for this group. + self._set_titration_state(group_index, 0) + + def _add_titration_state( + self, + titration_group_index, + relative_energy, + parameters_for_current_state, + proton_count: int, + cooh_movers: Optional[List[COOHDummyMover]] = None, + ): + """ + Add a titration state to a titratable group. + + Parameters + ---------- + + titration_group_index : int + the index of the titration group to which a new titration state is to be added + relative_energy : simtk.unit.Quantity with units compatible with simtk.unit.kilojoules_per_mole + the relative energy of this protonation state + charges : list or numpy array of simtk.unit.Quantity with units compatible with simtk.unit.elementary_charge + the atomic charges for this titration state + proton_count : int + number of protons in this titration state + cooh_movers : list of COOHDummyMovers that this state can use + + Notes + ----- + + The number of charges specified must match the number (and order) of atoms in the defined titration group. + """ + + # Check input arguments. + if titration_group_index not in range(self._get_num_titratable_groups()): + raise Exception( + "Invalid titratable group requested. Requested %d, valid groups are in range(%d)." + % (titration_group_index, self._get_num_titratable_groups()) + ) + + charges = [] + for atom in parameters_for_current_state['nonbonded']: + charges.append(parameters_for_current_state['nonbonded'][atom]['charge']) + + if len(charges) != len( + self.titrationGroups[titration_group_index].atom_indices + ): + raise Exception( + "The number of charges must match the number (and order) of atoms in the defined titration group." + ) + + + state = _TautomerState.from_lists( + relative_energy * self.beta, + parameters_for_current_state, + proton_count, + cooh_movers, + ) + self.titrationGroups[titration_group_index].add_state(state) + return + + + + def _generating_parameter_for_current_state(self, state_block): + """ + Generates the parameter lookup for tautomer state changes + ---------- + state_block - lxml.etree + Returns + ------- + dictionary containing 'nonbonded', 'charge_list', 'bonded', 'angle', 'torsion' entries. + + """ + + + bonded_par = dict() + angle_par = dict() + torsion_par = dict() + nonbonded_par = dict() + charge_list = list() + # Extract charges for this titration state. + # is defined in elementary_charge units + log.info('###############') + state_index = int(state_block.get("index")) + log.info('-- Looking at parameters of State: {}'.format(state_index)) + + # build atom properties + for index, atom in enumerate(state_block.xpath("Atom")): + nonbonded_par[atom.get('name')] = { 'charge' : float(atom.get("charge")), + 'epsilon' : float(atom.get("epsilon")), + 'sigma' : float(atom.get("sigma")), + 'type' : atom.get('type'), + 'ffxml_index' : int(index)} + charge_list.append(float(atom.get("charge"))) + + # build bond properties + for index, bond in enumerate(state_block.xpath("Bond")): + key = (bond.get('name1'), bond.get('name2')) + bonded_par[key] = { 'length' : bond.get('length'), 'k': bond.get('k'), 'ffxml_index' : int(index)} + + # build angle properties + for index, angle in enumerate(state_block.xpath("Angle")): + key = (angle.get('name1'), angle.get('name2'), angle.get('name3')) + angle_par[key] = {'angle' : angle.get('angle'), 'k' : angle.get('k'), 'ffxml_index' : int(index)} + + # build torsion properties + log.info('Parsing proper parameters from ffxml file') + for index, torsion in enumerate(state_block.xpath("Torsion")): + torsion_par[index] = dict(torsion.items()) + + return {'nonbonded' : nonbonded_par, 'charge_list' : charge_list, 'bonded' : bonded_par, 'angle' : angle_par, 'torsion' : torsion_par} + + + +class _TautomerState(_TitrationState): + """Representation of a titration state""" + + def __init__(self): + """Instantiate a _TautomerState""" + + self.g_k = None # dimensionless quantity + self.charges = list() + self.proton_count = None + self._forces = list() + self._target_weight = None + self.parameters = None + # MC moves should be functions that take the positions, and return updated positions, + # and a log (reverse/forward) proposal probability ratio + self._mc_moves = dict() # Dict[str, List[COOHDummyMover]] + + + @classmethod + def from_lists(cls, g_k, parameters, proton_count,cooh_movers: Optional[List[COOHDummyMover]] = None): + """Instantiate a _TitrationState from g_k, proton count and a list of the charges + + Returns + ------- + obj - a new _TitrationState instance + """ + obj = cls() + obj.g_k = g_k # dimensionless quantity + obj.lookup_for_parameters = copy.deepcopy(parameters) + obj.proton_count = proton_count + obj.charges = copy.deepcopy(parameters['charge_list']) + + # Note that forces are to be manually added by force caching functionality in ProtonDrives + obj._forces = list() + obj._target_weight = None + if cooh_movers is not None: + for mover in cooh_movers: + if "COOH" not in obj._mc_moves: + obj._mc_moves["COOH"] = list() + obj._mc_moves["COOH"].append(mover) + + + return obj + + + +def log_progress(sequence, every=None, size=None, name='Items'): + from ipywidgets import IntProgress, HTML, VBox + from IPython.display import display + + is_iterator = False + if size is None: + try: + size = len(sequence) + except TypeError: + is_iterator = True + if size is not None: + if every is None: + if size <= 200: + every = 1 + else: + every = int(size / 200) # every 0.5% + else: + assert every is not None, 'sequence is iterator, set every' + + if is_iterator: + progress = IntProgress(min=0, max=1, value=1) + progress.bar_style = 'info' + else: + progress = IntProgress(min=0, max=size, value=0) + label = HTML() + box = VBox(children=[label, progress]) + display(box) + + index = 0 + try: + for index, record in enumerate(sequence, 1): + if index == 1 or index % every == 0: + if is_iterator: + label.value = '{name}: {index} / ?'.format( + name=name, + index=index + ) + else: + progress.value = index + label.value = u'{name}: {index} / {size}'.format( + name=name, + index=index, + size=size + ) + yield record + except: + progress.bar_style = 'danger' + raise + else: + progress.bar_style = 'success' + progress.value = index + label.value = "{name}: {index}".format( + name=name, + index=str(index or '?') + ) diff --git a/protons/app/ligands.py b/protons/app/ligands.py index 6ac70ae..0efc160 100644 --- a/protons/app/ligands.py +++ b/protons/app/ligands.py @@ -21,6 +21,7 @@ import networkx as nx import lxml from .. import app +from .driver import strip_in_unit_system from simtk.openmm import openmm from simtk.unit import * from ..app.integrators import GBAOABIntegrator @@ -28,6 +29,8 @@ from typing import Optional import parmed from typing import List, Dict, Tuple +from io import StringIO +import matplotlib.pyplot as plt PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) @@ -1548,6 +1551,8 @@ def generate_protons_ffxml( pH: float, resname: str = "LIG", omega_max_confs: int = 200, + tautomers : bool = False, + pdb_file_path : str = "input.pdb" ): """ Compile a protons ffxml file from a preprocessed mol2 file, and a dictionary of states and charges. @@ -1610,19 +1615,261 @@ def generate_protons_ffxml( [oemolecule], normalize=False, omega_max_confs=omega_max_confs ) log.debug(ffxml) + isomers[isomer_index]["ffxml"] = etree.fromstring(ffxml, parser=xmlparser) isomers[isomer_index]["pH"] = pH + if tautomers: + # set some additional parameters + _register_tautomers(isomers, isomer_index, oemolecule, pdb_file_path, residue_name=resname) + ifs.close() - compiler = _TitratableForceFieldCompiler(isomers, residue_name=resname) + if not tautomers: + compiler = _TitratableForceFieldCompiler(isomers, residue_name=resname) + else: + compiler = _TautomerForceFieldCompiler(isomers, residue_name=resname) _write_ffxml(compiler, outputffxml) log.debug("Done! Your result is located here: {}".format(outputffxml)) return outputffxml +def _register_tautomers(isomers, isomer_index, oemolecule, pdb_file_path, residue_name): + ffxml = isomers[isomer_index]['ffxml'] + + name_type_mapping = {} + name_charge_mapping = {} + atom_index_mapping = {} + for residue in ffxml.xpath('Residues/Residue'): + for idx, atom in enumerate(residue.xpath('Atom')): + name_type_mapping[atom.get('name')] = atom.get('type') + name_charge_mapping[atom.get('name')] = atom.get('charge') + atom_index_mapping[atom.get('name')] = idx + + # generate atom graph representation + G = nx.Graph() + atom_name_to_unique_atom_name = dict() + + # set atoms + for atom in oemolecule.GetAtoms(): + atom_name = atom.GetName() + atom_type = name_type_mapping[atom_name] + atom_charge = name_charge_mapping[atom_name] + atom_index = atom_index_mapping[atom_name] + atom_name_to_unique_atom_name[atom_name] = atom_name + + if int(atom.GetAtomicNum()) == 1: + #found hydrogen + heavy_atom_name = None + for atom in atom.GetAtoms(): + if int(atom.GetAtomicNum()) != 1: + heavy_atom_name = atom.GetName() + break + atom_name_to_unique_atom_name[atom_name] = atom_name + name_type_mapping[heavy_atom_name] + atom_name = atom_name + name_type_mapping[heavy_atom_name] + + G.add_node(atom_name, atom_type=atom_type, atom_charge=atom_charge, atom_index=atom_index) + + # enumerate hydrogens in this list + hydrogens = list() + # set bonds + for bond in oemolecule.GetBonds(): + a1 = bond.GetBgn() + a1_atom_name = a1.GetName() + a2 = bond.GetEnd() + a2_atom_name = a2.GetName() + if int(a1.GetAtomicNum()) == 1: + #found hydrogen + heavy_atom_name = None + for atom in a1.GetAtoms(): + if int(atom.GetAtomicNum()) != 1: + heavy_atom_name = atom.GetName() + break + hydrogens.append(tuple([a1_atom_name, a2_atom_name])) + a1_atom_name = a1_atom_name + name_type_mapping[heavy_atom_name] + if int(a2.GetAtomicNum()) == 1: + #found hydrogen + heavy_atom_name = None + for atom in a2.GetAtoms(): + if int(atom.GetAtomicNum()) != 1: + heavy_atom_name = atom.GetName() + break + hydrogens.append(tuple([a2_atom_name, a1_atom_name])) + a2_atom_name = a2_atom_name + name_type_mapping[heavy_atom_name] + G.add_edge(a1_atom_name, a2_atom_name) + + # generate hydrogen definitions for each tautomer state + # this is used to generate an openMM system of the tautomer + isomers[isomer_index]['hxml'] = generate_tautomer_hydrogen_definitions(hydrogens, residue_name, isomer_index) + isomers[isomer_index]['mol-graph'] = G + isomers[isomer_index]['atom_name_dict'] = atom_name_to_unique_atom_name + generate_tautomer_torsion_definitions(isomers, pdb_file_path, isomer_index) + +def generate_tautomer_hydrogen_definitions(hydrogens, residue_name, isomer_index): + + """ + Creates a hxml file that is used to add hydrogens for a specific tautomer to the heavy-atom skeleton + + Parameters + ---------- + hydrogens: list of tuple + Tuple contains two atom names: (hydrogen-atom-name, heavy-atom-atom-name) + residue_name : str + name of the residue to fill the Residues entry in the xml tree + isomer_index : int + + """ + + hydrogen_definitions_tree = etree.fromstring('') + hydrogen_file_residue = etree.fromstring("") + hydrogen_file_residue.set('name', residue_name) + + for name, parent in hydrogens: + h_xml = etree.fromstring("") + h_xml.set("name", name) + h_xml.set("parent", parent) + hydrogen_file_residue.append(h_xml) + hydrogen_definitions_tree.append(hydrogen_file_residue) + + return hydrogen_definitions_tree + +def generate_tautomer_torsion_definitions(isomers : dict, vacuum_file:str, isomer_index:int): + """ + Creates torsion entries (proper and improper) in the isomer dictionary. It does this + by creating an openMM system using the heavy-atom skeleton defined in the pdb file (vacuum_file) and the ffxml file + of the specific isomer. Hydrogens are added using the hxml file of the specific isomer. + + Parameters + ---------- + isomers: list of dicts + One dict is necessary for every isomer. Dict should contain 'ffxml' and 'hxml' + vacumm_file : str + location for heavy-atom skeleton pdb file + isomer_index : int + + """ + + log.info("Generate tautomer torsion definitions ...") + ffxml = StringIO(etree.tostring(isomers[isomer_index]['ffxml']).decode("utf-8")) + hxml = StringIO(etree.tostring(isomers[isomer_index]['hxml']).decode("utf-8")) + forcefield = app.ForceField('amber10.xml', 'gaff.xml', ffxml, 'tip3p.xml') + pdb = app.PDBFile(vacuum_file) + app.Modeller.loadHydrogenDefinitions(hxml) + modeller = app.Modeller(pdb.topology, pdb.positions) + modeller.addHydrogens() + index_to_name = dict() + for a in modeller.topology.atoms(): + index_to_name[a.index] = isomers[isomer_index]['atom_name_dict'][a.name] + + system = forcefield.createSystem(modeller.topology) + isomers[isomer_index]['torsion-forces'] = [] + + for force in system.getForces(): + force_classname = force.__class__.__name__ + + if force_classname == 'PeriodicTorsionForce': + for torsion_index in range(force.getNumTorsions()): + a1, a2, a3, a4, periodicity, phase, k = force.getTorsionParameters(torsion_index) + par = { 'index': torsion_index, + 'atom1-name' : index_to_name[a1], + 'atom2-name' : index_to_name[a2], + 'atom3-name' : index_to_name[a3], + 'atom4-name' : index_to_name[a4], + 'periodicity' : int(strip_in_unit_system(periodicity)), + 'phase' : strip_in_unit_system(phase), + 'k' : strip_in_unit_system(k)} + isomers[isomer_index]['torsion-forces'].append(par) + + #_log_forces(system.getForces(), index_to_name) + + +def _log_forces(forces, atom_index_to_atom_name): + + """ + Helper function that outputs all forces defined in a system and displays + atom names using a mapping dictionary. + + Parameters + ---------- + forces: openMM.Force + list of forces present in a system + atom_name_by_atom_index : str + location for heavy-atom skeleton pdb file + + """ + + + for force in forces: + # Get name of force class. + force_classname = force.__class__.__name__ + log.debug('############################') + log.debug('{}'.format(force_classname)) + log.debug('############################') + + if force_classname == 'NonbondedForce': + log.debug('#######################') + log.debug('NonbondedForce') + log.debug('#######################') + for atom_idx in sorted(atom_index_to_atom_name): + charge, sigma, eps = map(strip_in_unit_system, force.getParticleParameters(atom_idx)) + log.debug('Idx:{} Name: {} Charge:{} Sigma:{} Eps:{}'.format(atom_idx, atom_index_to_atom_name[atom_idx], charge, sigma, eps)) + + elif force_classname == 'HarmonicBondForce': + log.debug('#######################') + log.debug('HarmonicBondForce') + log.debug('#######################') + for bond_idx in range(force.getNumBonds()): + a1, a2, length, k = map(strip_in_unit_system, force.getBondParameters(bond_idx)) + if a1 not in atom_index_to_atom_name or a2 not in atom_index_to_atom_name: + continue + log.debug('Idx:{} Atom1:{} Atom2:{} l:{} k:{}'.format(bond_idx, atom_index_to_atom_name[a1], atom_index_to_atom_name[a2], length, k)) + + elif force_classname == 'HarmonicAngleForce': + for angle_idx in range(force.getNumAngles()): + a1, a2, a3, angle, k = map(strip_in_unit_system, force.getAngleParameters(angle_idx)) + if a1 not in atom_index_to_atom_name or a2 not in atom_index_to_atom_name or a3 not in atom_index_to_atom_name: + continue + log.debug('Idx:{} Atom1:{} Atom2:{} Atom3:{} Angle:{} k:{}'.format(angle_idx, atom_index_to_atom_name[a1], atom_index_to_atom_name[a2], atom_index_to_atom_name[a3], angle, k)) + + elif force_classname == 'PeriodicTorsionForce': + for torsion_idx in range(force.getNumTorsions()): + a1, a2, a3, a4, periodicity, phase, k = map(strip_in_unit_system, force.getTorsionParameters(torsion_idx)) + if a1 not in atom_index_to_atom_name or a2 not in atom_index_to_atom_name or a3 not in atom_index_to_atom_name or a4 not in atom_index_to_atom_name: + continue + log.debug('Idx:{} Atom1:{} Atom2:{} Atom3:{} Atom4:{} Per:{} Phase:{} k:{}'.format(torsion_idx, atom_index_to_atom_name[a1], atom_index_to_atom_name[a2], atom_index_to_atom_name[a3], atom_index_to_atom_name[a4], periodicity, phase, k)) + +def _find_hydrogen_types_for_tautomers(gafftree: lxml.etree.ElementTree, xmlfftree: lxml.etree.ElementTree) -> set: + """ + Find all atom types that describe hydrogen atoms. + + Parameters + ---------- + gafftree - A GAFF input xml file that contains atom type definitions. + xmlfftree - the customized force field template generated that contains the dummy hydrogen definitions + + Returns + ------- + set - names of all atom types that correspond to hydrogen + """ + + # Detect all hydrogen types by element and store them in a set + hydrogen_types = set() + for atomtype in gafftree.xpath('AtomTypes/Type'): + if atomtype.get('element') == "H": + hydrogen_types.add(atomtype.get('name')) + + for atomtype in xmlfftree.xpath('AtomTypes/Type'): + # adds dummy atome types + if atomtype.get('name').startswith("d"): + hydrogen_types.add(atomtype.get('name')) + + + return hydrogen_types + + + def create_hydrogen_definitions( - inputfile: str, outputfile: str, gaff: str = gaff_default + inputfile: str, outputfile: str, gaff: str = gaff_default, tautomers: bool = False ): """ Generates hydrogen definitions for a small molecule residue template. @@ -1643,8 +1890,10 @@ def create_hydrogen_definitions( ) # Output tree hydrogen_definitions_tree = etree.fromstring("") - hydrogen_types = _find_hydrogen_types(gafftree) - + if tautomers is not True: + hydrogen_types = _find_hydrogen_types(gafftree) + else: + hydrogen_types = _find_hydrogen_types_for_tautomers(gafftree, xmltree) for residue in xmltree.xpath("Residues/Residue"): hydrogen_file_residue = etree.fromstring("") hydrogen_file_residue.set("name", residue.get("name")) @@ -1757,15 +2006,18 @@ def prepare_calibration_systems( app.Modeller.loadHydrogenDefinitions(hxml) if ffxml is not None: forcefield = app.ForceField( - "amber10-constph.xml", "gaff.xml", ffxml, "tip3p.xml", "ions_tip3p.xml" + "amber10.xml", "gaff.xml", ffxml, "tip3p.xml", "ions_tip3p.xml" ) + #forcefield = app.ForceField( + # "amber10-constph.xml", "gaff.xml", ffxml, "tip3p.xml", "ions_tip3p.xml" + #) else: forcefield = app.ForceField( "amber10-constph.xml", "gaff.xml", "tip3p.xml", "ions_tip3p.xml" ) _, vacuum_extension = os.path.splitext(vacuum_file) - if vacuum_extension == ".pdf": + if vacuum_extension == ".pdb": pdb = app.PDBFile(vacuum_file) elif vacuum_extension == ".cif": pdb = app.PDBxFile(vacuum_file) @@ -1777,11 +2029,11 @@ def prepare_calibration_systems( # The system will likely have different hydrogen names. # In this case its easiest to just delete and re-add with the right names based on hydrogen files - if delete_old_H: - to_delete = [ - atom for atom in modeller.topology.atoms() if atom.element.symbol in ["H"] - ] - modeller.delete(to_delete) + #if delete_old_H: + # to_delete = [ + # atom for atom in modeller.topology.atoms() if atom.element.symbol in ["H"] + # ] + # modeller.delete(to_delete) modeller.addHydrogens(forcefield=forcefield) @@ -1821,3 +2073,611 @@ def prepare_calibration_systems( app.PDBxFile.writeFile( modeller.topology, positions, open(f"{output_basename}-water.cif", "w") ) + + + +class _TautomerForceFieldCompiler(_TitratableForceFieldCompiler): + + def _make_output_tree(self, chimera=True): + """ + Store all contents of a compiled ffxml file of all isomers, and add dummies for all missing hydrogens. + """ + + # Obtain information about all the atoms + self._complete_atom_registry() + # Obtain information about all the bonds + self._complete_bond_registry() + # Register the states + self._complete_state_registry() + + # Set the initial state of the template that is read by OpenMM + self._initialize_forcefield_template() + # Add isomer specific information + self._add_isomers() + # Append extra parameters from frcmod + self._append_extra_gaff_types() + # Append dummy parameters + self._append_dummy_parameters() + + # Remove empty blocks, and unnecessary information in the ffxml tree + self._sanitize_ffxml() + + def _complete_state_registry(self): + """ + Store all the properties that are specific to each state + """ + mol_graphs = {} + for index, state in enumerate(self._input_state_data): + mol_graphs[index] = state['mol-graph'] + + self.register_tautomers(mol_graphs) + + charges = list() + for index, state in enumerate(self._input_state_data): + net_charge = state['net_charge'] + charges.append(int(net_charge)) + template = _State(index, + state['log_population'], + 0.0, # set g_k defaults to 0 for now + net_charge, + self._atom_names, + state['pH'] + ) + + self._state_templates.append(template) + + min_charge = min(charges) + for state in self._state_templates: + state.set_number_of_protons(min_charge) + return + + def _append_dummy_parameters(self): + def _unique(element): + + if 'type3' in element.attrib: + a1, a2, a3 = element.attrib['type1'], element.attrib['type2'], element.attrib['type3'] + if (a1, a2, a3) in unique_set: + return False + else: + unique_set.add((a1,a2,a3)) + unique_set.add((a3,a2,a1)) + return True + else: + a1, a2 = element.attrib['type1'], element.attrib['type2'] + if (a1, a2) in unique_set: + return False + else: + unique_set.add((a1,a2)) + unique_set.add((a2,a1)) + return True + + unique_set = set() + log.debug('################################') + log.debug('Appending dummy parameters') + log.debug('################################') + + # add dummy atom types present in the isomers + for element_string in set(self.dummy_atom_type_strings): + self._add_to_output(etree.fromstring(element_string), "/ForceField/AtomTypes") + log.debug('Adding dummy atom element: \n{}'.format(element_string)) + + for element_string in set(self.dummy_atom_nb_strings): + self._add_to_output(etree.fromstring(element_string), "/ForceField/NonbondedForce") + log.debug('Adding dummy atom nonbonded parameters: \n{}'.format(element_string)) + + for element_string in set(self.dummy_bond_strings): + e = etree.fromstring(element_string) + if _unique(e): + self._add_to_output(e, "/ForceField/HarmonicBondForce") + log.debug(element_string) + + for element_string in set(self.dummy_angle_strings): + e = etree.fromstring(element_string) + if _unique(e): + self._add_to_output(etree.fromstring(element_string), "/ForceField/HarmonicAngleForce") + log.debug(element_string) + + + def _initialize_forcefield_template(self): + """ + Set up the residue template using the superset graph. + The residue includes all atom names of all the states, only the atoms of + the first state have read atom types. + """ + + residue = self.ffxml.xpath('/ForceField/Residues/Residue')[0] + atom_string = '' + bond_string = '' + + for node in self.superset_graph: + if node in self.atom_types_dict[0]: + atom_charge = self.atom_charge_dict[0][node] + atom_type = self.atom_types_dict[0][node] + residue.append(etree.fromstring(atom_string.format(name=node, atom_type=atom_type, charge=atom_charge))) + else: + atom_type='d' + str(node) + residue.append(etree.fromstring(atom_string.format(name=node, atom_type=atom_type, charge=0.0))) + + for bond in self.superset_graph.edges: + atom1_name = bond[0] + atom2_name = bond[1] + residue.append(etree.fromstring(bond_string.format(atomName1=atom1_name, atomName2=atom2_name))) + + + def _add_isomers(self): + """ + Add all the isomer specific data to the xml template. + """ + log.debug('Add tautomer information ...') + + protonsdata = etree.fromstring("") + protonsdata.attrib['number_of_states'] = str(len(self._state_templates)) + + atom_string = '' + dummy_atom_string = '' + dummy_nb_string = '' + + bond_string = '' + dummy_bond_string = '' + + angle_string = '' + dummy_angle_string = '' + + torsion_string = '' + + # pregenerating all angles and torsions present + # generate all angles present in the superset + list_of_angles = [] + for node1 in self.superset_graph: + for node2 in self.superset_graph: + if node2.startswith('H') or not self.superset_graph.has_edge(node1, node2): + continue + else: + for node3 in self.superset_graph: + if not self.superset_graph.has_edge(node2, node3) or node3 == node1: + continue + else: + list_of_angles.append([node1, node2, node3]) + + + # here the entries for all atoms, bonds, angles and torsions are generated + # and added to the xml file using ATOM NAMES (not atom type) for each residue + # also dummy types are generated + self.dummy_atom_type_strings = list() + self.dummy_atom_nb_strings = list() + self.dummy_bond_strings = list() + self.dummy_angle_strings = list() + + atom_name_to_atom_type_inclusive_dummy_types = dict() + + for residue in self.ffxml.xpath('/ForceField/Residues/Residue'): + for isomer_index, isomer in enumerate(self._state_templates): + ############################################## + # atom entries + ############################################## + log.debug('ISOMER: {}'.format(isomer_index)) + isomer_str = str(isomer) + log.debug(isomer_str) + isomer_xml = etree.fromstring(isomer_str) + + # atom_types_dict for specific isomer + isomer_atom_name_to_atom_type = self.atom_types_dict[isomer_index] + isomer_atom_name_to_atom_charge = self.atom_charge_dict[isomer_index] + tmp_atom_name_to_atom_type_inclusive_dummy_types = dict() + # iterate over all nodes in superset graph + for node in self.superset_graph: + parm = None + e = None + # if node is in isomer_atom_types dict it has an assigned atom type in this isomer + if node in isomer_atom_name_to_atom_type: + atom_type = isomer_atom_name_to_atom_type[node] + parm = self._retrieve_parameters(atom_type1=atom_type) + e = atom_string.format(node1=node, atom_type=atom_type, charge=isomer_atom_name_to_atom_charge[node],epsilon=parm['nonbonds'].attrib['epsilon'],sigma=parm['nonbonds'].attrib['sigma']) + else: + atom_type = 'd' + node + e = atom_string.format(node1=node, atom_type=atom_type, charge=0.0,epsilon=0,sigma=0) + + if int(isomer_index) == 0: + # add dummy nb and atom parameters for first isomer + self.dummy_atom_type_strings.append(dummy_atom_string.format(atom_type=atom_type, element='H', mass=1.008)) + self.dummy_atom_nb_strings.append(dummy_nb_string.format(atom_type=atom_type, charge=0.0,epsilon=0,sigma=0)) + + tmp_atom_name_to_atom_type_inclusive_dummy_types[node] = atom_type + isomer_xml.append(etree.fromstring(e)) + log.debug(e) + + atom_name_to_atom_type_inclusive_dummy_types[isomer_index] = tmp_atom_name_to_atom_type_inclusive_dummy_types + ############################################## + # bond entries + ############################################## + isomer_bond_atom_names = self.complete_list_of_bonds[isomer_index]['bonds_atom_names'] + dummy_bond_type_list = [] + for edge in self.superset_graph.edges: + node1 = edge[0] + node2 = edge[1] + # there is never a bond that has not + # in some isomer only real atoms - therefore we can search through the atom_types_dict + atom_types = [] + # if node is not in iserom_atom_types_dict it does not have an assigned atom type in this isomer + if [node1, node2] in isomer_bond_atom_names or [node2, node1] in isomer_bond_atom_names: + atom_types.extend([isomer_atom_name_to_atom_type[node1], isomer_atom_name_to_atom_type[node2]]) + parm = self._retrieve_parameters(atom_type1=atom_types[0], atom_type2=atom_types[1]) + bond_length = parm['bonds'].attrib['length'] + k = parm['bonds'].attrib['k'] + found_parameter = True + else: + # search through all atom_types_dict to find real atom type + found_parameter = False + + for tmp_isomer_index in range(len(self._state_templates)): + if [node1, node2] in self.complete_list_of_bonds[tmp_isomer_index]['bonds_atom_names'] or [node2, node1] in self.complete_list_of_bonds[tmp_isomer_index]['bonds_atom_names']: + atom_types.extend([self.atom_types_dict[tmp_isomer_index][node1], self.atom_types_dict[tmp_isomer_index][node2]]) + found_parameter = True + # add dummy bond parameters for all superset bonds for starting residue + parm = self._retrieve_parameters(atom_type1=atom_types[0], atom_type2=atom_types[1]) + bond_length = parm['bonds'].attrib['length'] + k = parm['bonds'].attrib['k'] + if int(isomer_index) == 0: + # add dummy bond entries for first isomer + d = dummy_bond_string.format(atomType1=atom_name_to_atom_type_inclusive_dummy_types[0][node1], atomType2=atom_name_to_atom_type_inclusive_dummy_types[0][node2], bond_length=bond_length, k=k) + self.dummy_bond_strings.append(d) + d = None + break + + if found_parameter is False: + print('CAREFUL! parameter not found') + raise NotImplementedError('This case is not covered so far.') + + e = bond_string.format(node1=node1, node2=node2, bond_length=bond_length, k=k) + log.debug(e) + isomer_xml.append(etree.fromstring(e)) + + ############################################## + # angle entries + ############################################## + isomer_angle_atom_names = self.complete_list_of_angles[isomer_index]['angles_atom_names'] + + for nodes in list_of_angles: + node1, node2, node3 = nodes + found_parameters = False + parm = None + # angle between three atoms that are real in this isomer + if [node1, node2, node3] in isomer_angle_atom_names or [node3, node2, node1] in isomer_angle_atom_names: + atom_type1, atom_type2, atom_type3 = isomer_atom_name_to_atom_type[node1], isomer_atom_name_to_atom_type[node2],isomer_atom_name_to_atom_type[node3] + parm = self._retrieve_parameters(atom_type1=atom_type1,atom_type2=atom_type2,atom_type3=atom_type3) + angle = parm['angle'].attrib['angle'] + k = parm['angle'].attrib['k'] + found_parameters = True + else: + # not real in this isomer - look at other isomers and get parameters from isomer + # in which these 3 atoms form real angle + for tmp_isomer_index in range(len(self._state_templates)): + if [node1, node2, node3] in self.complete_list_of_angles[tmp_isomer_index]['angles_atom_names'] or [node3, node2, node1] in self.complete_list_of_angles[tmp_isomer_index]['angles_atom_names']: + parm = self._retrieve_parameters(atom_type1=self.atom_types_dict[tmp_isomer_index][node1],atom_type2=self.atom_types_dict[tmp_isomer_index][node2],atom_type3=self.atom_types_dict[tmp_isomer_index][node3]) + angle = parm['angle'].attrib['angle'] + k = parm['angle'].attrib['k'] + found_parameters = True + if int(isomer_index) == 0: + # add dummy angle parameters for first isomer + d = dummy_angle_string.format(atomType1=atom_name_to_atom_type_inclusive_dummy_types[0][node1], atomType2=atom_name_to_atom_type_inclusive_dummy_types[0][node2], atomType3=atom_name_to_atom_type_inclusive_dummy_types[0][node3], angle=angle, k=k) + self.dummy_angle_strings.append(d) + d = None + break + + # this is not a real angle - there are never only real atoms involved in this angle + if not found_parameters: + angle = 0.0 + k = 0.0 + e = angle_string.format(node1=node1, node2=node2, node3=node3, angle=angle, k=k) + if int(isomer_index) == 0: + # add dummy angle parameters for first isomer + d = dummy_angle_string.format(atomType1=atom_name_to_atom_type_inclusive_dummy_types[0][node1], atomType2=atom_name_to_atom_type_inclusive_dummy_types[0][node2], atomType3=atom_name_to_atom_type_inclusive_dummy_types[0][node3], angle=angle, k=k) + self.dummy_angle_strings.append(d) + d = None + + e = angle_string.format(node1=node1, node2=node2, node3=node3, angle=angle, k=k) + log.debug(e) + + isomer_xml.append(etree.fromstring(e)) + + ############################################## + # torsion entries + ############################################## + for torsion_force in self._input_state_data[isomer_index]['torsion-forces']: + e = torsion_string.format(node1=torsion_force['atom1-name'], node2=torsion_force['atom2-name'], node3=torsion_force['atom3-name'], node4=torsion_force['atom4-name'], periodicity=torsion_force['periodicity'], phase=torsion_force['phase'], k=torsion_force['k']) + isomer_xml.append(etree.fromstring(e)) + log.debug(e) + + protonsdata.append(isomer_xml) + residue.append(protonsdata) + + def register_tautomers(self, mol_graphs): + + # generate union mol graph + # and register the atom_types, bonds, angles and torsions for the different mol_graphs (tautomers) + superset_graph = nx.Graph() + + ##################### + #atoms + ##################### + atom_types_dict = dict() + atom_charge_dict = dict() + for state_idx in mol_graphs: + atom_types_dict_tmp = dict() + atom_charges_dict_tmp = dict() + for node, data in mol_graphs[state_idx].nodes(data=True): + superset_graph.add_node(node) + atom_types_dict_tmp[node] = data['atom_type'] + atom_charges_dict_tmp[node] = data['atom_charge'] + + atom_types_dict[state_idx] = atom_types_dict_tmp + atom_charge_dict[state_idx] = atom_charges_dict_tmp + + ##################### + #bonds + ##################### + complete_list_of_bonds = dict() + for state_idx in mol_graphs: + list_of_bonds_atom_types = [] + list_of_bonds_atom_names = [] + + for edge in mol_graphs[state_idx].edges(data=True): + a1_name = (list(edge)[0]) + a2_name = (list(edge)[1]) + superset_graph.add_edge(a1_name, a2_name) + list_of_bonds_atom_types.append([atom_types_dict[state_idx][a1_name], atom_types_dict[state_idx][a2_name]]) + list_of_bonds_atom_names.append([a1_name, a2_name]) + + complete_list_of_bonds[state_idx] = {'bonds_atom_types' : list_of_bonds_atom_types, 'bonds_atom_names' : list_of_bonds_atom_names} + + nx.draw(superset_graph, pos=nx.kamada_kawai_layout(superset_graph), with_labels=True, font_weight='bold', node_size=1400, alpha=0.5, font_size=12) + plt.show() + + ##################### + #angles + ##################### + complete_list_of_angles = dict() + for state_idx in mol_graphs: + list_of_angles_atom_types = [] + list_of_angles_atom_names = [] + for node1 in mol_graphs[state_idx]: + for node2 in mol_graphs[state_idx]: + if node2.startswith('H') or not mol_graphs[state_idx].has_edge(node1, node2): + continue + else: + for node3 in mol_graphs[state_idx]: + if not mol_graphs[state_idx].has_edge(node2, node3) or node3 == node1: + continue + else: + atom_types = [] + for node in [node1, node2, node3]: + if node in atom_types_dict[state_idx]: + atom_types.append(atom_types_dict[state_idx][node]) + else: + atom_types.append('d' + node) + list_of_angles_atom_types.append(atom_types) + list_of_angles_atom_names.append([node1, node2, node3]) + + complete_list_of_angles[state_idx] = {'angles_atom_types' : list_of_angles_atom_types, 'angles_atom_names' : list_of_angles_atom_names} + + + self.superset_graph = superset_graph + self.atom_types_dict = atom_types_dict + self.atom_charge_dict = atom_charge_dict + self.complete_list_of_bonds = complete_list_of_bonds + self.complete_list_of_angles = complete_list_of_angles + + def _retrieve_parameters(self, **kwargs): + """ Look through FFXML files and find all parameters pertaining to the supplied atom type. + Looks either for atom, bond, angle or torsion parameters depending on the number of arguments provided. + Returns + ------- + input : atom_type1:str, atom_type2[opt]:str, atom_type3[opt]:str, atom_type4[opt]:str, + """ + + + # Storing all the detected parameters here + params = {} + # Loop through different sources of parameters + + if len(kwargs) == 1: + # Loop through different sources of parameters + for xmltree in self._xml_parameter_trees: + # Match the type of the atom in the AtomTypes block + for atomtype in xmltree.xpath("/ForceField/AtomTypes/Type"): + if atomtype.attrib['name'] == kwargs['atom_type1']: + params['type'] = atomtype + for nonbond in xmltree.xpath("/ForceField/NonbondedForce/Atom"): + if nonbond.attrib['type'] == kwargs['atom_type1']: + params['nonbonds'] = nonbond + + return params + + elif len(kwargs) == 2: + for xmltree in self._xml_parameter_trees: + # Match the bonds of the atom in the HarmonicBondForce block + for bond in xmltree.xpath("/ForceField/HarmonicBondForce/Bond"): + if sorted([kwargs['atom_type1'], kwargs['atom_type2']]) == sorted([bond.attrib['type1'], bond.attrib['type2']]): + params['bonds'] = bond + return params + + + elif len(kwargs) == 3: + search_list = [kwargs['atom_type1'], kwargs['atom_type2'], kwargs['atom_type3']] + for xmltree in self._xml_parameter_trees: + # Match the angles of the atom in the HarmonicAngleForce block + for angle in xmltree.xpath("/ForceField/HarmonicAngleForce/Angle"): + angle_atom_types_list = [angle.attrib['type1'], angle.attrib['type2'], angle.attrib['type3']] + if search_list == angle_atom_types_list or search_list == angle_atom_types_list[::-1]: + params['angle'] = angle + return params + else: + continue + return params + + + +def prepare_mol2_for_parametrization( + input_mol2: str, + output_mol2: str, + patch_bonds: bool = True, + keep_intermediate: bool = False, + ): + """ + Map the hydrogen atoms between multiple tautomer states and return a mol2 file that + should be ready to parametrize. + + Parameters + ---------- + input_mol2: location of the multistate mol2 file. + + Notes + ----- + This renames the hydrogen atoms in your molecule so that + no ambiguity can exist between protonation states. + """ + if not output_mol2[-5:] == ".mol2": + output_mol2 += ".mol2" + + unique_filename = str(uuid.uuid4()) + tmpfiles: List[str] = [] + + if patch_bonds: + tmpsd = "{}.sdf".format(unique_filename) + tmpfiles.append(tmpsd) + ifs = oechem.oemolistream() + ofs = oechem.oemolostream() + if ifs.open(input_mol2): + ofs.open(tmpsd) + for mol in ifs.GetOEGraphMols(): + oechem.OEWriteMolecule(ofs, mol) + else: + oechem.OEThrow.Fatal("Unable to read mol.") + + bondfixmol2 = "{}-bondfix.mol2".format(unique_filename) + sd_bonds = get_sd_bonds(tmpsd) + fixed_mol2_contents = replace_mol2_bonds(input_mol2, sd_bonds) + + with open(bondfixmol2, "w") as bondfixedfile: + bondfixedfile.write(fixed_mol2_contents) + + tmpfiles.append(bondfixmol2) + + # File to be used for unifying atom names between states. + intermediate_mol2 = bondfixmol2 + else: + # File to be used for unifying atom names between states. + intermediate_mol2 = input_mol2 + + + ifs = oechem.oemolistream() + ifs.open(intermediate_mol2) + + # make oemols for mapping + graphmols = [oechem.OEGraphMol(mol) for mol in ifs.GetOEGraphMols()] + ifs.close() + + # Make graph for keeping track of which atoms are the same + graph = nx.Graph() + + # Some hydrogens within one molecule may be chemically identical, and would otherwise be indistinguishable + # And some hydrogens accidentally get the same name + # Therefore, give every hydrogen a unique identifier. + # One labelling the molecule, the other labeling the position in the molecule. + for imol, mol in enumerate(graphmols): + h_count = 0 + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() == 1: + h_count += 1 + # H for hydrogen, M for mol + atom.SetName("H{}-M{}".format(h_count,imol+1)) + # Add hydrogen atom to the graph + graph.add_node(atom, mol=imol) + + # Connect atoms that are the same + # No need to avoid self maps for now. Code is fast enough + for i1, mol1 in enumerate(graphmols): + for i2, mol2 in enumerate(graphmols): + + mol1_atoms = [atom for atom in mol1.GetAtoms()] + mol2_atoms = [atom for atom in mol2.GetAtoms()] + + # operate on a copy to avoid modifying molecule + pattern = oechem.OEGraphMol(mol1) + target = oechem.OEGraphMol(mol2) + + # Element should be enough to map + atomexpr = oechem.OEExprOpts_AtomicNumber + # Ignore aromaticity et cetera + bondexpr = oechem.OEExprOpts_EqSingleDouble + + # create maximum common substructure object + mcss = oechem.OEMCSSearch(pattern, atomexpr, bondexpr, oechem.OEMCSType_Approximate) + # set scoring function + mcss.SetMCSFunc(oechem.OEMCSMaxAtoms()) + mcss.SetMinAtoms(oechem.OECount(pattern, oechem.OEIsHeavy())) + mcss.SetMaxMatches(10) + + # Constrain all heavy atoms, so the search goes faster. + # These should not be different anyways + for at1 in pattern.GetAtoms(): + # skip H + if at1.GetAtomicNum() < 2: + continue + for at2 in target.GetAtoms(): + # skip H + if at2.GetAtomicNum() < 2: + continue + if at1.GetName() == at2.GetName(): + pat_idx = mcss.GetPattern().GetAtom(oechem.HasAtomIdx(at1.GetIdx())) + tar_idx = target.GetAtom(oechem.HasAtomIdx(at2.GetIdx())) + if not mcss.AddConstraint(oechem.OEMatchPairAtom(pat_idx, tar_idx)): + raise ValueError("Could not constrain {} {}.".format(at1.GetName(), at2.GetName())) + + unique = True + matches = mcss.Match(target, unique) + # We should only use the top one match. + for count, match in enumerate(matches): + for ma in match.GetAtoms(): + idx1 = ma.pattern.GetIdx() + idx2 = ma.target.GetIdx() + # Add edges between all hydrogens + if mol1_atoms[idx1].GetAtomicNum() == 1: + if mol2_atoms[idx2].GetAtomicNum() == 1: + graph.add_edge(mol1_atoms[idx1], mol2_atoms[idx2]) + # Sanity check, we should never see two elements mixed + else: + raise RuntimeError("Two atoms of different elements were matched.") + # stop after one match + break + + # Assign unique but matching ID's per atom/state + + # The current H counter + h_count = 0 + + for cc in nx.connected_components(graph): + # All of these atoms are chemically identical, but there could be more than one per molecule. + atomgraph = graph.subgraph(cc) + # Keep track of the unique H count + h_count += 1 + names = [at.GetName() for at in atomgraph.nodes] + # last part says which molecule the atom belongs to + mol_identifiers = [int(name.split('-M')[1]) for name in names ] + # Number + counters = {i+1: 0 for i,mol in enumerate(graphmols)} + for atom, mol_id in zip(atomgraph.nodes, mol_identifiers): + h_num = h_count + counters[mol_id] + atom.SetName("H{}".format(h_num)) + counters[mol_id] += 1 + + # If more than one hydrogen per mol found, add it to the count. + extra_h_count = max(counters.values()) - 1 + if extra_h_count < 0: + raise ValueError("Found 0 hydrogens in graph, is there a bug?") + h_count += extra_h_count + + _mols_to_file(graphmols, output_mol2) + if not keep_intermediate: + for fname in tmpfiles: + os.remove(fname)