Skip to content

Commit

Permalink
Bug in ring index bumping fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed Nov 6, 2024
1 parent 36f7e23 commit 40db564
Show file tree
Hide file tree
Showing 7 changed files with 1,663 additions and 711 deletions.
1,756 changes: 1,337 additions & 419 deletions examples/fgs.ipynb

Large diffs are not rendered by default.

23 changes: 20 additions & 3 deletions glyles/glycans/mono/monomer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
GlycanLexer.MOD = GlycanLexer.QMARK + 1


def shift(d, offset):
x = str(int(d) + offset)
if len(x) != 1:
return "%" + x
return x


class Monomer:

def __init__(self, origin=None, **kwargs):
Expand Down Expand Up @@ -319,9 +326,19 @@ def to_smiles(self, ring_index, root_idx=None, root_id=None):
root_id = self.__check_root_id(root_id)

smiles = MolToSmiles(self.get_structure(), rootedAtAtom=root_id)
for match in reversed(list(re.finditer(r'[a-zA-GI-Z|\]]\d+', smiles))):
num = int(smiles[match.start() + 1: match.end()]) + ring_index
smiles = smiles[:match.start() + 1] + ("" if num < 10 else "%") + str(num) + smiles[match.end():]

# bump ring indices to avoid clashes with outer rings
# for match in reversed(list(re.finditer(r'[a-zA-GI-Z0-9|\]]\d', smiles))):
# num = int(smiles[match.start() + 1: match.end()]) + ring_index * 10
# smiles = smiles[:match.start() + 1] + "%" + str(num) + smiles[match.end():]
for m in reversed(list(iter(re.finditer(r'[A-G|I-Za-z|\]|%]\d+', smiles)))):
start, end = m.start(), m.end()
match = smiles[start:end]
if match[0] == "%":
new_str = shift(match[1:], ring_index)
else:
new_str = match[0] + "".join(shift(d, ring_index) for d in match[1:])
smiles = smiles[:start] + new_str + smiles[end:]
return smiles

def __check_root_id(self, root_id):
Expand Down
42 changes: 28 additions & 14 deletions glyles/glycans/mono/reactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"TIPS": "[Si](C(C)C)(C(C)C)C(C)C",
"TMS": "[Si](C)(C)C",
"Thr": "N[C@@H](C(=O)O)[C@@H](O)C",
"triN": "N=[N+]=[N-]",
"Troc": "C(=O)OCC(Cl)(Cl)Cl",
"Ts": "S(=O)(=O)c2ccc(C)cc2",
"Ulo": "OC(c2ccccc2)(c2ccccc2(Cl))CCN(C)C",
Expand Down Expand Up @@ -135,20 +136,33 @@
"tdPam": "OC(=O)CCCCCCC/C=C/CCCCCC",
"tBu": "OC(C)(C)C",

# Methanol, Ethanol, Propanol, ...
"Me": "O" + "C" * 1,
"Et": "O" + "C" * 2,
"Pr": "O" + "C" * 3,
"Prop": "O" + "C" * 3,
"Bu": "O" + "C" * 4,
"Pe": "O" + "C" * 5,
"Hx": "O" + "C" * 6,
"Hp": "O" + "C" * 7,
"Oc": "O" + "C" * 8,
"Nn": "O" + "C" * 9,
"Dec": "O" + "C" * 10,
"Und": "O" + "C" * 11,
"Dod": "O" + "C" * 12,
# Methan, Ethan, Propan, ...
"Me": "C" * 1,
"Et": "C" * 2,
"Pr": "C" * 3,
"Prop": "C" * 3,
"Bu": "C" * 4,
"Pe": "C" * 5,
"Hx": "C" * 6,
"Hp": "C" * 7,
"Oc": "C" * 8,
"Nn": "C" * 9,
"Dec": "C" * 10,
"Und": "C" * 11,
"Dod": "C" * 12,
# "Me": "O" + "C" * 1,
# "Et": "O" + "C" * 2,
# "Pr": "O" + "C" * 3,
# "Prop": "O" + "C" * 3,
# "Bu": "O" + "C" * 4,
# "Pe": "O" + "C" * 5,
# "Hx": "O" + "C" * 6,
# "Hp": "O" + "C" * 7,
# "Oc": "O" + "C" * 8,
# "Nn": "O" + "C" * 9,
# "Dec": "O" + "C" * 10,
# "Und": "O" + "C" * 11,
# "Dod": "O" + "C" * 12,
# ... and their acids
"But": "OC(=O)" + "C" * 2,
"Vl": "OC(=O)" + "C" * 4,
Expand Down
1 change: 1 addition & 0 deletions glyles/glycans/poly/glycan.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ def __parse(self):
if not self.tree_only and self.tree_full and self.full:
self.glycan_smiles = Merger(self.factory).merge(self.parse_tree, self.root_orientation, start=self.start)
# catch any exception at glycan level to not destroy the whole pipeline because of one mis-formed glycan
print(self.glycan_smiles)

except ParseError as e:
msg = e.__str__().replace("\n", " ")
Expand Down
18 changes: 9 additions & 9 deletions glyles/grammar/Glycan.g4
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ PHOSPHOR:
'P';
FG:
'Ceroplastic' | 'Lacceroic' | '3oxoMyr' | 'Psyllic' | 'Geddic' | 'Alloc' | 'Allyl' | 'Phthi' | 'TBDPS' | 'aLnn'
| 'ClAc' | 'Coum' | 'eSte' | 'Fmoc' | 'gLnn' | 'HSer' | 'Pico' | 'Prop' | 'TIPS' | 'Troc' | 'Ach' | 'Aep' | 'Ala'
| 'Ang' | 'Asp' | 'Beh' | 'Boc' | 'But' | 'Cbz' | 'Cct' | 'Cer' | 'Cet' | 'Cho' | 'cHx' | 'Cin' | 'Crt' | 'Cys'
| 'DCA' | 'Dce' | 'Dco' | 'Dec' | 'Dhp' | 'DMT' | 'Dod' | 'Etg' | 'Etn' | 'EtN' | 'Fer' | 'Glu' | 'Gly' | 'Gro'
| 'Hpo' | 'Hse' | 'Hxo' | 'Lac' | 'Lau' | 'Leu' | 'Lev' | 'Lin' | 'Lys' | 'Mal' | 'Mar' | 'Mel' | 'MMT' | 'MOM'
| 'Mon' | 'Myr' | 'NAP' | 'Ner' | 'Nno' | 'Non' | 'Oco' | 'Ole' | 'oNB' | 'Orn' | 'Pam' | 'Pic' | 'Piv' | 'PMB'
| 'PMP' | 'Poc' | 'Pro' | 'Pyr' | 'Ser' | 'Sin' | 'Ste' | 'TBS' | 'tBu' | 'TCA' | 'TES' | 'TFA' | 'THP' | 'Thr'
| 'Tig' | 'TMS' | 'Udo' | 'Ulo' | 'ulo' | 'Und' | 'Vac' | 'Ac' | 'Al' | 'Am' | 'Bn' | 'Br' | 'Bu' | 'Bz' | 'Cl'
| 'Cm' | 'DD' | 'DL' | 'en' | 'Et' | 'Fo' | 'Gc' | 'Hp' | 'Hx' | 'LD' | 'LL' | 'Me' | 'N3' | 'Nn' | 'Ns' | 'Oc'
| 'Pe' | 'Ph' | 'Pp' | 'Pr' | 'Tf' | 'Tr' | 'Ts' | 'Vl' | 'A' | 'F' | 'I' | 'S';
| 'ClAc' | 'Coum' | 'eSte' | 'Fmoc' | 'gLnn' | 'HSer' | 'Pico' | 'Prop' | 'TIPS' | 'triN' | 'Troc' | 'Ach' | 'Aep'
| 'Ala' | 'Ang' | 'Asp' | 'Beh' | 'Boc' | 'But' | 'Cbz' | 'Cct' | 'Cer' | 'Cet' | 'Cho' | 'cHx' | 'Cin' | 'Crt'
| 'Cys' | 'DCA' | 'Dce' | 'Dco' | 'Dec' | 'Dhp' | 'DMT' | 'Dod' | 'Etg' | 'Etn' | 'EtN' | 'Fer' | 'Glu' | 'Gly'
| 'Gro' | 'Hpo' | 'Hse' | 'Hxo' | 'Lac' | 'Lau' | 'Leu' | 'Lev' | 'Lin' | 'Lys' | 'Mal' | 'Mar' | 'Mel' | 'MMT'
| 'MOM' | 'Mon' | 'Myr' | 'NAP' | 'Ner' | 'Nno' | 'Non' | 'Oco' | 'Ole' | 'oNB' | 'Orn' | 'Pam' | 'Pic' | 'Piv'
| 'PMB' | 'PMP' | 'Poc' | 'Pro' | 'Pyr' | 'Ser' | 'Sin' | 'Ste' | 'TBS' | 'tBu' | 'TCA' | 'TES' | 'TFA' | 'THP'
| 'Thr' | 'Tig' | 'TMS' | 'Udo' | 'Ulo' | 'ulo' | 'Und' | 'Vac' | 'Ac' | 'Al' | 'Am' | 'Bn' | 'Br' | 'Bu' | 'Bz'
| 'Cl' | 'Cm' | 'DD' | 'DL' | 'en' | 'Et' | 'Fo' | 'Gc' | 'Hp' | 'Hx' | 'LD' | 'LL' | 'Me' | 'N3' | 'Nn' | 'Ns'
| 'Oc' | 'Pe' | 'Ph' | 'Pp' | 'Pr' | 'Tf' | 'Tr' | 'Ts' | 'Vl' | 'A' | 'F' | 'I' | 'S';
ANHYDRO:
'Anhydro';
HEAD:
Expand Down
2 changes: 1 addition & 1 deletion glyles/grammar/GlycanLexer.interp

Large diffs are not rendered by default.

Loading

0 comments on commit 40db564

Please sign in to comment.