diff --git a/ugropy/groupscsv/unifac/unifac_info.csv b/ugropy/groupscsv/unifac/unifac_info.csv index feb1eec..792328b 100644 --- a/ugropy/groupscsv/unifac/unifac_info.csv +++ b/ugropy/groupscsv/unifac/unifac_info.csv @@ -111,4 +111,4 @@ NCO|109|51|1.0567|0.732 (CH2)2SU|118|55|2.6869|2.12 CH2CHSU|119|55|2.4595|1.808 IMIDAZOL|178|84|2.026|0.868 -BTI|179|85|5.774|4.932 \ No newline at end of file +BTI|179|85|5.774|4.932 diff --git a/ugropy/groupscsv/unifac/unifac_subgroups.csv b/ugropy/groupscsv/unifac/unifac_subgroups.csv index 84d8519..57f09f6 100644 --- a/ugropy/groupscsv/unifac/unifac_subgroups.csv +++ b/ugropy/groupscsv/unifac/unifac_subgroups.csv @@ -1,114 +1,114 @@ -group|detection_smarts|smarts|contribute|composed|molecular_weight -CH3|[CX4H3]||"{""CH3"": 1}"|n|15.035 -CH2|[CX4H2]||"{""CH2"": 1}"|n|14.027 -CH|[CX4H]||"{""CH"": 1}"|n|13.019 -C|[CX4H0]||"{""C"": 1}"|n|12.011 -CH2=CH|[CH2]=[CH]||"{""CH2=CH"": 1}"|n|27.046 -CH=CH|[CH]=[CH]||"{""CH=CH"": 1}"|n|26.038 -CH2=C|[CH2]=[CH0]||"{""CH2=C"": 1}"|n|26.038 -CH=C|[CH]=[CH0]||"{""CH=C"": 1}"|n|25.03 -ACH|[cH]||"{""ACH"": 1}"|n|13.019 -AC|[cH0]||"{""AC"": 1}"|n|12.011 -ACCH3|[c][CX4H3]||"{""ACCH3"": 1, ""CH3"": -1, ""AC"": -1}"|y|27.046 -ACCH2|[c][CX4H2]||"{""ACCH2"": 1, ""CH2"": -1, ""AC"": -1}"|y|26.038 -ACCH|[c][CX4H]||"{""ACCH"": 1, ""CH"": -1, ""AC"": -1}"|y|25.03 -OH|[OH]||"{""OH"": 1}"|n|17.007 -CH3OH|[CH3][OH]||"{""CH3OH"": 1, ""CH3"": -1, ""OH"": -1}"|n|32.042 -H2O|[OH2]||"{""H2O"": 1}"|n|18.015 -ACOH|[cH0][OH]||"{""ACOH"": 1, ""OH"": -1, ""AC"": -1}"|y|29.018 -CH3CO|[CH3]C(=O)([#6,Si])|[CH3]C(=O)|"{""CH3CO"": 1, ""CH3"" : -1}"|n|43.045 -CH2CO|[CH2]C(=O)[$([#6,Si]);!$([CH3])]|[CH2]C(=O)|"{""CH2CO"": 1, ""CH2"": -1}"|n|42.037 -HCO|[CH](=O)([#6,Si])|[CH](=O)|"{""HCO"": 1}"|n|29.018 -CH3COO|[CH3][C](=O)[OH0]||"{""CH3COO"": 1, ""CH3"": -1, ""COO"": -1}"|y|59.044 -CH2COO|[CH2][C](=O)[OH0]||"{""CH2COO"": 1, ""CH2"": -1, ""COO"": -1}"|y|58.036 -HCOO|[CH](=O)[OH0]||"{""HCOO"": 1, ""COO"": -1}"|n|45.017 -CH3O|[CH3]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O])]|[CH3][OH0]|"{""CH3O"": 1, ""CH3"": -1}"|n|31.034 -CH2O|[CH2]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3])]|[CH2][OH0]|"{""CH2O"": 1, ""CH2"": -1}"|n|30.026 -CHO|[CH]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3]);!$([CH2])]|[CH][OH0]|"{""CHO"": 1, ""CH"": -1}"|n|29.018 -THF|[C]1[CH2]O[CH2][C]1|[CH2;R][O;R]|"{""THF"": 1, ""CH2O"": -1}"|n|30.026 -CH3NH2|[CH3][NH2]||"{""CH3NH2"": 1, ""CH3"": -1}"|n|31.058 -CH2NH2|[CH2][NH2]||"{""CH2NH2"": 1, ""CH2"": -1}"|n|30.05 -CHNH2|[CH][NH2]||"{""CHNH2"": 1, ""CH"": -1}"|n|29.042 -CH3NH|[CH3][NH][#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH]|"{""CH3NH"": 1, ""CH3"": -1}"|n|30.05 -CH2NH|[CH2][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH]|"{""CH2NH"": 1, ""CH2"": -1}"|n|29.042 -CHNH|[CH][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])&!$([CH2])]|[CH][NH]|"{""CHNH"": 1, ""CH"": -1}"|n|28.034 -CH3N|[CH3][NH0]([#6,Si;!$([CH](=O));!$(C(=O)[!N])])[#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH0]|"{""CH3N"": 1, ""CH3"": -1}"|n|29.042 -CH2N|[CH2][NH0]([$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])])[$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH0]|"{""CH2N"": 1, ""CH2"": -1}"|n|28.034 -ACNH2|[cH0][NH2]||"{""ACNH2"": 1, ""AC"": -1}"|n|28.034 -C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1||"{""C5H5N"": 1, ""ACH"": -5}"|n|79.102 -C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1||"{""C5H4N"": 1, ""ACH"": -4, ""AC"":-1}"|n|78.094 -C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1||"{""C5H3N"": 1, ""ACH"": -3, ""AC"": -2}"|n|77.086 -CH3CN|[CH3][C]#[N]||"{""CH3CN"": 1, ""CH3"": -1}"|n|41.053 -CH2CN|[CH2][C]#[N]||"{""CH2CN"": 1, ""CH2"": -1}"|n|40.045 -COOH|[CH0](*)(=O)(-[OH])|[CH0](=O)(-[OH])|"{""COOH"": 1, ""OH"": -1}"|n|45.017 -HCOOH|[CH](=O)(-[OH])||"{""HCOOH"": 1, ""OH"": -1}"|n|46.025 -CH2CL|[CH2](Cl)([!Cl])|[CH2][Cl]|"{""CH2CL"": 1, ""CH2"": -1}"|n|49.48 -CHCL|[CH](Cl)([!Cl])([!Cl])|[CH][Cl]|"{""CHCL"": 1, ""CH"": -1}"|n|48.472 -CCL|[CH0](Cl)([!Cl])([!Cl])([!Cl])|[CH0][Cl]|"{""CCL"": 1, ""C"": -1}"|n|47.464 -CH2CL2|[CH2](Cl)(Cl)||"{""CH2CL2"": 1, ""CH2"": -1}"|n|84.933 -CHCL2|[CH](Cl)(Cl)([!Cl])|[CH](Cl)(Cl)|"{""CHCL2"": 1, ""CH"": -1}"|n|83.925 -CCL2|[CH0](Cl)(Cl)([!Cl])([!Cl])|[CH0](Cl)(Cl)|"{""CCL2"": 1, ""C"": -1}"|n|82.917 -CHCL3|[CH](Cl)(Cl)(Cl)||"{""CHCL3"": 1, ""CH"": -1}"|n|119.378 -CCL3|[CH0](Cl)(Cl)(Cl)([!Cl])|[CH0](Cl)(Cl)(Cl)|"{""CCL3"": 1, ""C"": -1}"|n|118.37 -CCL4|[CH0](Cl)(Cl)(Cl)(Cl)||"{""CCL4"": 1, ""C"": -1}"|n|153.823 -ACCL|[cH0](Cl)||"{""ACCL"": 1, ""AC"": -1}"|n|47.464 -CH3NO2|[CH3]N(=O)(O)||"{""CH3NO2"": 1, ""CH3"": -1}"|n|61.04 -CH2NO2|[CH2]N(=O)(O)||"{""CH2NO2"": 1, ""CH2"": -1}"|n|60.032 -CHNO2|[CH]N(=O)(O)||"{""CHNO2"": 1, ""CH"": -1}"|n|59.024 -ACNO2|[cH0]N(=O)(O)||"{""ACNO2"": 1, ""AC"": -1}"|n|58.016 -CS2|C(=S)(=S)||"{""CS2"": 1}"|n|76.141 -CH3SH|[CH3][SH]||"{""CH3SH"": 1, ""CH3"": -1}"|n|48.1072 -CH2SH|[CH2][SH]||"{""CH2SH"": 1, ""CH2"": -1}"|n|47.0994 -FURFURAL|c1coc(c1)[CH]=O||"{""FURFURAL"": 1, ""ACH"": -3, ""HCO"": -1, ""CH"": 1, ""AC"": -1}"|n|96.0842 -DOH|[CH2]([OH])[CH2][OH]||"{""DOH"": 1, ""CH2"": -2, ""OH"": -2}"|n|62.0668 -I|[I][#6,Si]|[I]|"{""I"": 1}"|n|126.9 -BR|[Br][#6,Si]|[Br]|"{""BR"": 1}"|n|79.904 -CH=-C|[CH]#[C][*]|[CH]#[C]|"{""CH=-C"": 1}"|n|25.0298 -C=-C|[*][CH0]#[CH0][*]|[CH0]#[CH0]|"{""C=-C"": 1}"|n|24.022 -DMSO|[CH3][S](=O)[CH3]||"{""DMSO"": 1, ""CH3"": -1, ""CH3S"": -1}"|n|78.1328 -ACRY|[CH2]=[CH]-C#N||"{""ACRY"": 1, ""CH2=CH"": -1}"|n|53.0634 -CL-(C=C)|Cl[C]=[C]||"{""CL-(C=C)"": 1}"|n|35.453 -C=C|[CH0]=[CH0]||"{""C=C"": 1}"|n|24.022 -ACF|[cH0]F||"{""ACF"": 1, ""AC"": -1}"|n|31.009 -DMF|[CH3]N([CH3])[CH]=O||"{""DMF"": 1, ""CH3"": -2}"|n|73.0936 -HCON(CH2)2|[*][CH2]N([CH2][*])[CH]=O|[CH2]N([CH2])[CH]=O|"{""HCON(CH2)2"": 1, ""CH2"": -2}"|n|71.078 -CF3|[CH0](F)(F)(F)[!F]|[CH0](F)(F)(F)|"{""CF3"": 1, ""C"": -1}"|n|69.005 -CF2|[CH0](F)(F)([!F])[!F]|[CH0](F)(F)|"{""CF2"": 1, ""C"": -1}"|n|50.007 -CF|[CH0](F)([!F])([!F])[!F]|[CH0](F)|"{""CF"": 1, ""C"": -1}"|n|31.009 -COO|[C](*)(=O)[OH0][#6,Si]|[C](=O)[OH0]|"{""COO"": 1}"|n|44.009 -SIH3|[SiX4H3]||"{""SIH3"": 1}"|n|31.1094 -SIH2|[SiX4H2]||"{""SIH2"": 1}"|n|30.1016 -SIH|[SiX4H]||"{""SIH"": 1}"|n|29.0938 -SI|[SiX4H0]||"{""SI"": 1}"|n|28.086 -SIH2O|[SiX4H2]O[Si,CH0;!$([CH](=O));!$(C(=O)[!O])]|[SiX4H2]O|"{""SIH2O"": 1, ""SIH2"": -1}"|n|46.1006 -SIHO|[SiX4H]O[Si,CH0;!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H]O|"{""SIHO"": 1, ""SIH"": -1}"|n|45.0928 -SIO|[SiX4H0]O[Si,CH0;!$([SiH]);!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H0]O|"{""SIO"": 1, ""SI"": -1}"|n|44.085 -NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1||"{""NMP"": 1, ""CH2"": -2, ""AMCH3CH2"": -1}"|n|99.1312 -CCL3F|[CH0](Cl)(Cl)(Cl)(F)||"{""CCL3F"": 1, ""CCL3"": -1, ""CF"": -1, ""C"": 1}"|n|137.368 -CCL2F|[CH0](Cl)(Cl)(F)([!Cl&!F])|[CH0](Cl)(Cl)(F)|"{""CCL2F"": 1, ""CCL2"": -1, ""CF"": -1, ""C"": 1}"|n|101.915 -HCCL2F|[CH](Cl)(Cl)(F)||"{""HCCL2F"": 1, ""CHCL2"": -1}"|n|102.9228 -HCCLF|[CH](Cl)(F)([!Cl&!F])|[CH](Cl)(F)|"{""HCCLF"": 1, ""CHCL"": -1}"|n|67.4698 -CCLF2|[CH0](Cl)(F)(F)([!Cl&!F])|[CH0](Cl)(F)(F)|"{""CCLF2"": 1, ""CCL"": -1, ""CF2"": -1, ""C"": 1}"|n|85.46 -HCCLF2|[CH](Cl)(F)(F)||"{""HCCLF2"": 1, ""CHCL"": -1}"|n|86.4678 -CCLF3|[CH0](Cl)(F)(F)(F)||"{""CCLF3"": 1, ""CCL"": -1, ""CF3"": -1, ""C"": 1}"|n|104.458 -CCL2F2|[CH0](Cl)(Cl)(F)(F)||"{""CCL2F2"": 1, ""CCL2"": -1, ""CF2"": -1, ""C"": 1}"|n|120.913 -AMH2|[CH0](=O)([NH2])([!O])|[CH0](=O)([NH2])|"{""AMH2"": 1}"|n|44.0326 -AMHCH3|[CH0](=O)([NH][CH3])([!O])|[CH0](=O)([NH][CH3])|"{""AMHCH3"": 1, ""CH3"": -1}"|n|58.0592 -AMHCH2|[CH0](=O)([NH][CH2])([!O])|[CH0](=O)([NH][CH2])|"{""AMHCH2"": 1, ""CH2"": -1}"|n|57.0514 -AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])([!O])|[CH0](=O)([N]([CH3])[CH3])|"{""AM(CH3)2"": 1, ""CH3"": -2}"|n|72.0858 -AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])([!O])|[CH0](=O)([N]([CH3])[CH2])|"{""AMCH3CH2"": 1, ""CH3"": -1, ""CH2"": -1}"|n|71.078 -AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])([!O])|[CH0](=O)([N]([CH2])[CH2])|"{""AM(CH2)2"": 1, ""CH2"": -2}"|n|70.0702 -C2H5O2|[OH][CH2][CH2]O[#6,Si]|[OH][CH2][CH2]O|"{""C2H5O2"": 1, ""OH"": -1, ""CH2"": -1, ""CH2O"": -1}"|y|61.059 -C2H4O2|[OH][CH2][CH]O[#6,Si]|[OH][CH2][CH]O|"{""C2H4O2"": 1, ""OH"": -1, ""CHO"": -1, ""CH2"": -1}"|y|60.0512 -CH3S|[CH3][S][#6,Si]|[CH3][S]|"{""CH3S"": 1, ""CH3"": -1}"|n|47.0994 -CH2S|[CH2][S][$([#6,Si])&!$([CH3])]|[CH2][S]|"{""CH2S"": 1, ""CH2"": -1}"|n|46.0916 -CHS|[CH][S][$([#6,Si])&!$([CH3])&!$([CH2])]|[CH][S]|"{""CHS"": 1, ""CH"": -1}"|n|45.0838 -MORPH|[CH2]1O[CH2][CH2][NH][CH2]1||"{""MORPH"": 1, ""CH2"": -2, ""CH2O"": -1, ""CH2NH"": -1}"|n|87.1202 -C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1||"{""C4H4S"": 1, ""ACH"": -4}"|n|84.1402 -C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1||"{""C4H3S"": 1, ""ACH"": -3, ""AC"": -1}"|n|83.1324 -C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1||"{""C4H2S"": 1, ""ACH"": -2, ""AC"": -2}"|n|82.1246 -NCO|[NX2H0]=[CX2H0]=[OX1H0]||"{""NCO"": 1}"|n|42.017 -(CH2)2SU|[CH2]S(=O)(=O)[CH2]||"{""(CH2)2SU"": 1, ""CH2"": -1, ""CH2S"": -1}"|n|92.1162 -CH2CHSU|[CH2]S(=O)(=O)[CH]||"{""CH2CHSU"": 1, ""CH"": -1, ""CH2S"": -1}"|n|91.1084 -IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1||"{""IMIDAZOL"": 1, ""ACH"": -3}"|n|68.0782 -BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F||"{""BTI"": 1, ""CF3"": -2}"|n|279.91784 +group|smarts|molecular_weight +CH3|[CX4H3]|15.035 +CH2|[CX4H2]|14.027 +CH|[CX4H]|13.019 +C|[CX4H0]|12.011 +CH2=CH|[CH2]=[CH]|27.046 +CH=CH|[CH]=[CH]|26.038 +CH2=C|[CH2]=[CH0]|26.038 +CH=C|[CH]=[CH0]|25.03 +ACH|[cH]|13.019 +AC|[cH0]|12.011 +ACCH3|[c][CX4H3]|27.046 +ACCH2|[c][CX4H2]|26.038 +ACCH|[c][CX4H]|25.03 +OH|[OH]|17.007 +CH3OH|[CH3][OH]|32.042 +H2O|[OH2]|18.015 +ACOH|[cH0][OH]|29.018 +CH3CO|[CH3]C(=O)|43.045 +CH2CO|[CH2]C(=O)|42.037 +HCO|[CH](=O)|29.018 +CH3COO|[CH3][C](=O)[OH0]|59.044 +CH2COO|[CH2][C](=O)[OH0]|58.036 +HCOO|[CH](=O)[OH0]|45.017 +CH3O|[CH3][OH0]|31.034 +CH2O|[CH2][OH0]|30.026 +CHO|[CH][OH0]|29.018 +THF|[CH2;R][O;R]|30.026 +CH3NH2|[CH3][NH2]|31.058 +CH2NH2|[CH2][NH2]|30.05 +CHNH2|[CH][NH2]|29.042 +CH3NH|[CH3][NH]|30.05 +CH2NH|[CH2][NH]|29.042 +CHNH|[CH][NH]|28.034 +CH3N|[CH3][NH0]|29.042 +CH2N|[CH2][NH0]|28.034 +ACNH2|[cH0][NH2]|28.034 +C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1|79.102 +C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1|78.094 +C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1|77.086 +CH3CN|[CH3][C]#[N]|41.053 +CH2CN|[CH2][C]#[N]|40.045 +COOH|[CH0](=O)[OH]|45.017 +HCOOH|[CH](=O)[OH]|46.025 +CH2CL|[CH2][Cl]|49.48 +CHCL|[CH][Cl]|48.472 +CCL|[CH0][Cl]|47.464 +CH2CL2|[CH2](Cl)(Cl)|84.933 +CHCL2|[CH](Cl)(Cl)|83.925 +CCL2|[CH0](Cl)(Cl)|82.917 +CHCL3|[CH](Cl)(Cl)(Cl)|119.378 +CCL3|[CH0](Cl)(Cl)(Cl)|118.37 +CCL4|[CH0](Cl)(Cl)(Cl)(Cl)|153.823 +ACCL|[cH0](Cl)|47.464 +CH3NO2|[CH3]N(=O)(O)|61.04 +CH2NO2|[CH2]N(=O)(O)|60.032 +CHNO2|[CH]N(=O)(O)|59.024 +ACNO2|[cH0]N(=O)(O)|58.016 +CS2|C(=S)(=S)|76.141 +CH3SH|[CH3][SH]|48.1072 +CH2SH|[CH2][SH]|47.0994 +FURFURAL|c1coc(c1)[CH]=O|96.0842 +DOH|[CH2]([OH])[CH2][OH]|62.0668 +I|[I]|126.9 +BR|[Br]|79.904 +CH=-C|[CH]#[C]|25.0298 +C=-C|[CH0]#[CH0]|24.022 +DMSO|[CH3][S](=O)[CH3]|78.1328 +ACRY|[CH2]=[CH]-C#N|53.0634 +CL-(C=C)|Cl[C]=[C]|35.453 +C=C|[CH0]=[CH0]|24.022 +ACF|[cH0]F|31.009 +DMF|[CH3]N([CH3])[CH]=O|73.0936 +HCON(CH2)2|[CH2]N([CH2])[CH]=O|71.078 +CF3|[CH0](F)(F)(F)|69.005 +CF2|[CH0](F)(F)|50.007 +CF|[CH0](F)|31.009 +COO|[C](=O)[OH0]|44.009 +SIH3|[SiX4H3]|31.1094 +SIH2|[SiX4H2]|30.1016 +SIH|[SiX4H]|29.0938 +SI|[SiX4H0]|28.086 +SIH2O|[SiX4H2][OH0]|46.1006 +SIHO|[SiX4H][OH0]|45.0928 +SIO|[SiX4H0][OH0]|44.085 +NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1|99.1312 +CCL3F|[CH0](Cl)(Cl)(Cl)(F)|137.368 +CCL2F|[CH0](Cl)(Cl)(F)|101.915 +HCCL2F|[CH](Cl)(Cl)(F)|102.9228 +HCCLF|[CH](Cl)(F)|67.4698 +CCLF2|[CH0](Cl)(F)(F)|85.46 +HCCLF2|[CH](Cl)(F)(F)|86.4678 +CCLF3|[CH0](Cl)(F)(F)(F)|104.458 +CCL2F2|[CH0](Cl)(Cl)(F)(F)|120.913 +AMH2|[CH0](=O)([NH2])|44.0326 +AMHCH3|[CH0](=O)([NH][CH3])|58.0592 +AMHCH2|[CH0](=O)([NH][CH2])|57.0514 +AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])|72.0858 +AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])|71.078 +AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])|70.0702 +C2H5O2|[OH][CH2][CH2][OH0]|61.059 +C2H4O2|[OH][CH2][CH][OH0]|60.0512 +CH3S|[CH3][SH0]|47.0994 +CH2S|[CH2][SH0]|46.0916 +CHS|[CH][SH0]|45.0838 +MORPH|[CH2]1O[CH2][CH2][NH][CH2]1|87.1202 +C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1|84.1402 +C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1|83.1324 +C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1|82.1246 +NCO|[NX2H0]=[CX2H0]=[OX1H0]|42.017 +(CH2)2SU|[CH2]S(=O)(=O)[CH2]|92.1162 +CH2CHSU|[CH2]S(=O)(=O)[CH]|91.1084 +IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1|68.0782 +BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F|279.91784 diff --git a/ugropy/refactor/__init__.py b/ugropy/refactor/__init__.py index 554fdca..9f6de87 100644 --- a/ugropy/refactor/__init__.py +++ b/ugropy/refactor/__init__.py @@ -1,6 +1,6 @@ from .fragment import Fragment from .fragmentation_model import FragmentationModel -from .fragmentation_unifac import unifac +from .fragmentation_unifac import unifac2 __all__ = ["Fragment", "FragmentationModel", "unifac"] \ No newline at end of file diff --git a/ugropy/refactor/fragmentation_model.py b/ugropy/refactor/fragmentation_model.py index 19e8ec5..5d26d0e 100644 --- a/ugropy/refactor/fragmentation_model.py +++ b/ugropy/refactor/fragmentation_model.py @@ -1,21 +1,118 @@ from typing import List +import itertools + from ugropy.refactor.fragment import Fragment from rdkit import Chem +import numpy as np + +import pulp + class FragmentationModel: def __init__(self, fragments: List[Fragment]): self.fragments = fragments def detect_fragments(self, molecule: Chem.rdchem.Mol): - detected = {} + batch = DetectionBatch(molecule) for fragment in self.fragments: match = molecule.GetSubstructMatches(fragment.mol_object) if match: - detected[fragment.name] = match + batch.add_fragment(fragment.name, match) + return batch + +class DetectionBatch: + def __init__(self, molecule: Chem.rdchem.Mol): + self.n = molecule.GetNumAtoms() + self.fragments = {} + self.overlaped_fragments = {} + self.selected_fragments = [] + self.overlaped_atoms = [] + self.atoms_matrix = np.array([]) + self.solution_atoms = {} + self.solution = {} + + self.has_overlap = False + + + def get_groups(self): + self.build_overlap_matrix() + self.get_overlaped_fragments() + self.solve_overlap() + + for frag in self.overlaped_fragments.keys(): + if frag not in self.selected_fragments: + self.fragments.pop(frag) + + for frag in self.fragments.keys(): + name = frag.split("_")[0] + + if name not in self.solution_atoms.keys(): + self.solution_atoms[name] = [self.fragments[frag]] + else: + self.solution_atoms[name].append(self.fragments[frag]) + + for frag in self.solution_atoms.keys(): + self.solution[frag] = len(self.solution_atoms[frag]) + + + def add_fragment(self, fragment_name: str, fragments: tuple): + for i, f in enumerate(fragments): + self.fragments[f"{fragment_name}_{i}"] = list(f) + + def build_overlap_matrix(self): + self.atoms_matrix = np.zeros((len(self.fragments), self.n)) + + for i, fragment in enumerate(self.fragments.values()): + self.atoms_matrix[i, fragment] = 1 + + def get_overlaped_fragments(self): + overlap = np.sum(self.atoms_matrix, axis=0) + self.overlaped_atoms = np.argwhere(overlap > 1).flatten() + + for name, frag in self.fragments.items(): + if np.isin(frag, self.overlaped_atoms).any(): + self.overlaped_fragments[name] = frag + + def solve_overlap(self): + universe = set(self.overlaped_atoms) + + all_elements = set(itertools.chain.from_iterable(self.overlaped_fragments.values())) + + universe.update(all_elements) + + problem = pulp.LpProblem("Set_Cover_Problem", pulp.LpMinimize) + + n_frag = len(self.overlaped_fragments) + + x = pulp.LpVariable.dicts("x", range(n_frag), cat="Binary") + + problem += pulp.lpSum([x[i] for i in range(n_frag)]) + + for elem in universe: + sum_list = [] + for i, subset in enumerate(self.overlaped_fragments.values()): + if elem in subset: + sum_list.append(x[i]) + + # print(f"Restricción para el elemento {elem}: {sum_list} == 1") + problem += pulp.lpSum(sum_list) == 1 + + solver = pulp.getSolver('PULP_CBC_CMD', msg=False) + + problem.solve(solver) + + selected_subsets = [name for i, name in enumerate(self.overlaped_fragments.keys()) if pulp.value(x[i]) == 1] + + self.selected_fragments = selected_subsets + + + + - return detected \ No newline at end of file + + \ No newline at end of file diff --git a/ugropy/refactor/fragmentation_unifac.py b/ugropy/refactor/fragmentation_unifac.py index 0718e57..2edaa12 100644 --- a/ugropy/refactor/fragmentation_unifac.py +++ b/ugropy/refactor/fragmentation_unifac.py @@ -3,20 +3,24 @@ _unifac_fragments = [ - Fragment("CH3", "[CH3]"), - Fragment("CH2", "[CH2]"), - Fragment("CH", "[CH]"), - Fragment("C", "[CH0]"), + Fragment("CH3", "[CX4H3]"), + Fragment("CH2", "[CX4H2]"), + Fragment("CH", "[CX4H]"), + Fragment("C", "[CX4H0]"), Fragment("OH", "[OH]"), - Fragment("H2O", "O"), + Fragment("H2O", "[OH2]"), Fragment("ACH", "[cH]"), Fragment("AC", "[cH0]"), - Fragment("ACCH3", "[cH0][CH3]"), - Fragment("ACCH2", "[cH0][CH2]"), - Fragment("ACCH", "[cH0][CH]"), + Fragment("ACCH3", "[cH0][CX4H3]"), + Fragment("ACCH2", "[cH0][CX4H2]"), + Fragment("ACCH", "[cH0][CX4H]"), + Fragment("ACOH", "[cH0][OH]"), + Fragment("CH3O", "[CH3][OH0]"), + Fragment("CH2O", "[CH2][OH0]"), + Fragment("CHO", "[CH][OH0]"), + Fragment("COO", "[CX3H0](=[OH0])[OH0]"), + Fragment("ACNH2", "[cH0][NH2]"), ] -unifac = FragmentationModel(_unifac_fragments) - -# mol = Chem.MolFromSmiles("C(C1=CC=CC=C1)C1=CC=CC=C1") \ No newline at end of file +unifac2 = FragmentationModel(_unifac_fragments) diff --git a/ugropy/refactor/prueba.ipynb b/ugropy/refactor/prueba.ipynb new file mode 100644 index 0000000..ddb59a3 --- /dev/null +++ b/ugropy/refactor/prueba.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from fragmentation_unifac import unifac2\n", + "\n", + "from ugropy import get_groups, unifac\n", + "from ugropy.core.fragmentation_object import Fragmentation\n", + "\n", + "from rdkit import Chem\n", + "\n", + "import numpy as np\n", + "\n", + "from rdkit.Chem.Draw import rdMolDraw2D" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "smiles = \"CCCC1=C(COC(C)(C)COC(=O)OCC)C=C(CC2=CC=CC=C2)C=C1\"\n", + "#smiles = \"C13=C(C(=C(C(=C1C)C)CC2=C(C(=C(C(=C2C)CC)O[H])N([H])[H])C)C)CCCC3\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.23 ms ± 260 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "mol = Chem.MolFromSmiles(smiles)\n", + "sol = unifac2.detect_fragments(mol)\n", + "sol.get_groups()\n", + "sol.solution" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12.2 ms ± 132 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "sols = get_groups(unifac, smiles, \"smiles\").subgroups\n", + "sols\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sols == sol.solution" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "d = rdMolDraw2D.MolDraw2DCairo(1000, 1000) # or MolDraw2DSVG to get SVGs\n", + "\n", + "d.drawOptions().addAtomIndices = True\n", + "d.DrawMolecule(mol)\n", + "d.FinishDrawing()\n", + "\n", + "d.WriteDrawingText('atom_annotation_1.png') " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ugropy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}