diff --git a/lib.typ b/lib.typ index 7d1f5c1..0a36e4e 100644 --- a/lib.typ +++ b/lib.typ @@ -1,6 +1,7 @@ #import "@preview/cetz:0.4.1" #import "src/default.typ": default #import "src/utils/utils.typ" +#import "src/elements/molecule/molecule.typ": molecule #import "src/drawer.typ" #import "src/drawer.typ": skeletize, draw-skeleton, skeletize-config, draw-skeleton-config #import "src/elements/links.typ": * @@ -78,7 +79,6 @@ ), ) } -#let molecule(name: none, links: (:), lewis: (), vertical: false, mol) = fragment(name: name, links: links, lewis: lewis, vertical: vertical, mol) /// === Hooks /// Create a hook in the fragment. It allows to connect links to the place where the hook is. diff --git a/src/elements/molecule/generator.typ b/src/elements/molecule/generator.typ new file mode 100644 index 0000000..fadcdbc --- /dev/null +++ b/src/elements/molecule/generator.typ @@ -0,0 +1,163 @@ +#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left, cram-hollow-right, cram-hollow-left + +// ============================ Atom Processing ============================ + +/// Convert parsed atom structure to Typst math content +#let process-atom(parts) = { + let type = parts.type + + if type == "atoms" { + let base = parts.parts.map(process-atom) + if parts.charge != none { + (math.attach(base.join(), tr: eval("$" + parts.charge + "$")),) + } else { + base + } + } else if type == "abbreviation" { + text(parts.value) + } else if type == "math-text" { + eval(parts.value) + } else if type == "element-group" { + math.attach(parts.element, tl: [#parts.isotope], br: [#parts.subscript]) + } else if type == "parenthetical" { + let inner = process-atom(parts.atoms) + math.attach([(#inner.join())], br: [#parts.subscript]) + } else if type == "complex" { + let inner = process-atom(parts.atoms) + [\[#inner.join()\]] + } else { + "unknown type: " + type + } +} + +/// Extract element names from parsed content and find the first non-H index +/// Returns the index of the first non-H element (0 if all are H or empty) +#let calc-main-index(parts) = { + // Extract element names recursively + let extract(p) = { + if p.type == "atoms" { p.parts.map(extract).flatten() } + else if p.type == "element-group" { (p.element,) } + else if p.type == "parenthetical" or p.type == "complex" { extract(p.atoms) } + else if p.type == "abbreviation" or p.type == "math-text" { (p.value,) } + else { () } + } + let elements = extract(parts) + + // Find first non-H index + for (idx, el) in elements.enumerate() { + if el != "H" { return idx } + } + 0 +} + +// ============================ Molecule ============================ + +#let generate_fragment(node) = ( + ( + type: "fragment", + atoms: node.atoms, + name: node.at("name", default: none), + links: node.at("links", default: (:)), + lewis: node.options.at("lewis", default: ()), + vertical: node.options.at("vertical", default: false), + count: node.atoms.len(), + colors: node.options.at("colors", default: none), + label: node.at("name", default: none), + ..node.options, + ), +) + +#let generate_bond(bond, angle, options) = { + let symbol = bond.symbol + let name = bond.at("name", default: none) + let absolute = if angle != none { angle } else { bond.at("absolute", default: none) } + let relative = bond.at("relative", default: none) + let options = if options != (:) { options } else { bond.options } + + let bond-fn = if symbol == "-" { + single + } else if symbol == "=" { + double + } else if symbol == "#" { + triple + } else if symbol == ">" { + cram-filled-right + } else if symbol == "<" { + cram-filled-left + } else if symbol == ":>" { + cram-dashed-right + } else if symbol == "<:" { + cram-dashed-left + } else if symbol == "|>" { + cram-hollow-right + } else if symbol == "<|" { + cram-hollow-left + } else { + single + } + + if absolute != none and relative != none { + bond-fn(relative: relative, absolute: absolute, name: name, ..options) + } else if relative != none { + bond-fn(relative: relative, name: name, ..options) + } else if absolute != none { + bond-fn(absolute: absolute, name: name, ..options) + } else { + bond-fn(name: name, ..options) + } +} + +#let generate_branch(bond, body) = ( + ( + type: "branch", + body: {bond; body}, + args: (:), + ), +) + +#let generate_cycle(cycle, body) = ( + type: "cycle", + faces: cycle.faces, + body: body, + args: (:), +) + +#let generate_molecule(molecule) = { + if molecule == none { return () } + if type(molecule) == array { return molecule } + if molecule.type != "molecule" { return () } + + let elements = () + elements += generate_unit(molecule.first) + for item in molecule.rest { + elements += generate_bond(item.bond) + elements += generate_unit(item.unit) + } + return elements +} + +// ============================ Reaction ============================ + +#let generate_operator(operator) = { + let op = if operator.op == "->" { + sym.arrow.r + } else if operator.op == "<->" { + sym.arrow.l.r + } else if operator.op == "<=>" { + sym.harpoons.ltrb + } else { + eval("$" + operator.op + "$") + } + + op = math.attach( + math.stretch(op, size: 100% + 2em), + t: [#term.condition-before], b: [#term.condition-after] + ) + + return ( + type: "operator", + name: none, + op: op, + margin: 0.7em, + ) +} diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ new file mode 100644 index 0000000..1a4aed9 --- /dev/null +++ b/src/elements/molecule/iupac-angle.typ @@ -0,0 +1,149 @@ +// relative angles +#let IUPAC_ANGLES = ( + main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg } - 60deg, + zigzag: idx => if calc.rem(idx, 2) == 1 { 60deg } else { -60deg }, + incoming: -180deg, + straight: 0deg, + + sp3: (60deg, -60deg, -120deg, -180deg), + sp2: (60deg, -60deg, -180deg), + sp: (0deg, -180deg), + + branch_angles: (n, idx) => 180deg - (idx + 1) * 360deg / n, + cycle_edge_angles: n => 360deg / n, + cycle_branch_angles: n => -150deg + 180deg / n, +) + +// Calculate the angles for the hybridization of the bonds +#let hybridization_angles(bonds, branches_len) = { + let n = bonds.len() + let triple = bonds.filter(b => b.symbol == "#").len() + let double = bonds.filter(b => b.symbol == "=").len() + let other = bonds.filter(b => b.symbol != "#" and b.symbol != "=").len() + + if n == 2 and (triple >= 1 or double >= 2) { IUPAC_ANGLES.sp } + else if branches_len <= 1 and (double >= 1 or other >= 2) { IUPAC_ANGLES.sp2 } + else if branches_len <= 2 { IUPAC_ANGLES.sp3 } + else { range(n).map(i => (IUPAC_ANGLES.branch_angles)(n, i)) } +} + +#let bond-angle(ctx, bond) = { + let (n, idx) = ctx.position.last() + + let angle = if ctx.parent_type == "unit" or ctx.parent_type == none { + ctx.current_angle + (IUPAC_ANGLES.zigzag)(idx) + } else if ctx.parent_type == "cycle" { + let (faces, _) = ctx.position.at(-2) + ctx.current_angle + (IUPAC_ANGLES.cycle_edge_angles)(faces) + } else if ctx.parent_type == "branch" { + ctx.current_angle + } else { + panic("Unknown parent type: " + ctx.parent_type) + } + + return (ctx + (current_angle: angle), angle) +} + +// Calculate relative angle for a ring attached to a main chain unit +// Returns (angle, absolute) tuple, or (none, false) if default behavior should be used +#let ring-angle(ctx, ring, rings_count, idx) = { + if ctx.parent_type == "cycle" { + // Inside a cycle - use context info for polycyclic vs hetero detection + let outer_faces = ctx.at("outer_cycle_faces", default: none) + let outer_bonds = ctx.at("outer_cycle_body_len", default: none) + + // Also check inner ring's bonds vs faces + let inner_faces = ring.faces + let inner_bonds = if ring.body != none and ring.body.type == "molecule" and ring.body.rest != none { + ring.body.rest.len() + } else { 0 } + + // Polycyclic: outer or inner has fewer bonds than faces + let is_polycyclic = outer_bonds < outer_faces or inner_bonds < inner_faces + + if is_polycyclic { + (none, false) + } else { + // Hetero - use branch angle + ((IUPAC_ANGLES.cycle_branch_angles)(outer_faces), false) + } + } else if ctx.prev_bond != none and ctx.next_bond != none { + // MIDDLE of chain - ring goes as a branch + let base = 0deg + if rings_count > 1 { + base = base + 60deg * (idx - (rings_count - 1) / 2) + } + (base, false) + } else if ctx.prev_bond != none or ctx.next_bond != none { + // START or END of chain - ring extends parallel to chain direction + let (_, chain_idx) = ctx.position.last() + let edge = 180deg / ring.faces + let base = if ctx.prev_bond == none { + // START: extend opposite to chain direction + ctx.current_angle + 150deg + edge + } else { + // END: continue in chain direction + // Uses main_chain_initial pattern for consistency + let offset = 120deg + (IUPAC_ANGLES.main_chain_initial)(chain_idx) + (IUPAC_ANGLES.zigzag)(chain_idx) / 2 + ctx.current_angle - offset + edge + } + if rings_count > 1 { + base = base + 60deg * (idx - (rings_count - 1) / 2) + } + (base, false) + } else { + (none, false) + } +} + +#let branch-angles(ctx, branches) = { + let (n, idx) = ctx.position.last() + + if branches.len() == 0 { return () } + + if ctx.parent_type == "cycle" { + let (faces, _) = ctx.position.at(-2) + let base_angle = (IUPAC_ANGLES.cycle_branch_angles)(faces) + + let branch_count = branches.len() + if branch_count == 1 { + return (base_angle,) + } + + // For multiple branches, spread them symmetrically + let spread = 60deg + return range(branch_count).map(i => { + base_angle + spread * (i - (branch_count - 1) / 2) + }) + } + + let bonds = branches.map(b => b.bond) + if ctx.prev_bond != none { bonds.push(ctx.prev_bond) } + if ctx.next_bond != none { bonds.push(ctx.next_bond) } + + let angles = hybridization_angles(bonds, branches.len()).filter( + angle => (ctx.prev_bond == none or angle != IUPAC_ANGLES.incoming) + and (ctx.next_bond == none or angle != (IUPAC_ANGLES.zigzag)(idx + 1)) + ) + + // first branches of the main chain + // Offset by 180deg + zigzag to face opposite to outgoing direction + if ctx.prev_bond == none and ctx.parent_type == none { + let outgoing = (IUPAC_ANGLES.zigzag)(idx + 1) + angles = angles.map(angle => angle + 180deg + outgoing) + } + + return angles +} + +#let initial-angle(ctx, molecule) = { + return (IUPAC_ANGLES.main_chain_initial)(molecule.rest.len()) +} + +/// Check if angle is vertical (around 90deg or 270deg) +/// Used to determine when to connect to main atom instead of H +#let is-vertical-angle(angle) = { + let a = calc.rem(angle / 1deg, 360) * 1deg + if a < 0deg { a += 360deg } + (a > 60deg and a < 120deg) or (a > 240deg and a < 300deg) +} diff --git a/src/elements/molecule/molecule.typ b/src/elements/molecule/molecule.typ new file mode 100644 index 0000000..2c29662 --- /dev/null +++ b/src/elements/molecule/molecule.typ @@ -0,0 +1,44 @@ +// Molecule parser and transformer module +// +// This module provides a high-level declarative syntax for chemical structures. +// +// Example usage: +// #skeletize(molecule("CH3-CH2-OH")) // Ethanol +// #skeletize(molecule("@6(-=-=-=)")) // Benzene +// +// Supported syntax: +// - Atoms: C, H, O, N, Cl, etc. +// - Bonds: - (single), = (double), # (triple), > < (wedge), :> <: (dashed wedge) +// - Branches: (bond content) e.g., CH3-CH(-OH)-CH3 +// - Rings: @n e.g., @6 for hexagon, @5 for pentagon +// - Labels: :name e.g., CH3:start +// - Charges: ^+ ^- ^2+ ^3- e.g., NH4^+ +// - Isotopes: ^14C, ^235U +// +// Limitations: +// - Maximum nesting depth: ~11 levels due to Typst's recursion limit +// Deeply nested structures like "-(-(-(-(...)))) " beyond 11 levels will fail +// - This is a limitation of the parser combinator approach in Typst +// +#import "parser.typ": alchemist-parser +#import "transformer.typ": transform + +/// Parse and transform a molecule string into alchemist elements. +/// +/// - content (string): The molecule string to parse +/// - name (string): Optional name for the molecule group +/// - ..args: Additional arguments (reserved for future use) +/// +/// Returns: Array of alchemist elements or error content +#let molecule(content, name: none, ..args) = { + let parsed = alchemist-parser(content) + if not parsed.success { + // Display error inline + return text(fill: red)[ + Failed to parse "#content": #parsed.error + ] + } + + let reaction = parsed.value + transform(reaction) +} diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ new file mode 100644 index 0000000..4ae67e8 --- /dev/null +++ b/src/elements/molecule/parser.typ @@ -0,0 +1,501 @@ +/* + // reaction syntax + input ::= reaction + reaction ::= term (OPERATOR term)* + term ::= COEFFICIENT? molecule + COEFFICIENT ::= DIGIT+ + + // operator expression + OPERATOR ::= CONDITION? OP_SYMBOL CONDITION? + CONDITION ::= "[" TEXT "]" + OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" | MATH_TEXT // TODO: Unicode is difficult to parse + + // molecule syntax + molecule ::= unit (bond unit)* + unit ::= (node | implicit_node) branch* ring* + node ::= fragment | label + implicit_node ::= ε + + fragment ::= FRAGMENT label? options? + bond ::= BOND_SYMBOL bond_label? options? + BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" + + branch ::= "(" bond molecule ")" + ring ::= "@" DIGIT+ "(" molecule? ")" label? options? + + label ::= ":" IDENTIFIER + bond_label ::= "::" IDENTIFIER + options ::= "(" key_value_pair ("," key_value_pair)* ")" + key_value_pair::= IDENTIFIER ":" value + + // FRAGMENT definition + FRAGMENT ::= ATOMS | ABBREVIATION | MATH_TEXT + ATOMS ::= ATOMS_PART+ CHARGE? + ATOMS_PART ::= ELEMENT_GROUP | PARENTHETICAL | COMPLEX + ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? + ISOTOPE ::= "^" DIGIT+ + ELEMENT ::= [A-Z][a-z]? + SUBSCRIPT ::= DIGIT+ + PARENTHETICAL ::= "(" ATOMS ")" SUBSCRIPT? + COMPLEX ::= "[" ATOMS "]" + CHARGE ::= "^" DIGIT? ("+" | "-") + ABBREVIATION ::= [a-z][A-Za-z]+ + + // Basic tokens + TEXT ::= [^[\]]+ | [^\s\(\)\[\]:,=\-<>#]+ + IDENTIFIER ::= [a-zA-Z_][a-zA-Z0-9_]* + DIGIT ::= [0-9] +*/ + +#import "../../utils/parser-combinator.typ": * +#import "generator.typ": process-atom, calc-main-index + +// ==================== Utilities ==================== + +#let digit = satisfy( + c => c >= "0" and c <= "9", name: "digit" +) +#let integer = map(some(digit), ds => int(ds.join())) +#let letter = satisfy( + c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), name: "letter" +) +#let uppercase = satisfy( + c => c >= "A" and c <= "Z", name: "uppercase" +) +#let lowercase = satisfy( + c => c >= "a" and c <= "z", name: "lowercase" +) +#let alphanum = satisfy( + c => (c >= "0" and c <= "9") or (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), + name: "alphanum" +) +#let identifier = { + seq(choice(letter, char("_")), many(choice(alphanum, char("_"))), map: r => { + let (first, rest) = r + first + rest.join() + }) +} +#let whitespace = one-of(" \t\n\r") +#let ws = many(whitespace) +#let space = one-of(" \t") +#let newline = choice(str("\r\n"), char("\n")) +#let lexeme(p) = seq(p, ws, map: r => r.at(0)) +#let token(s) = lexeme(str(s)) + +// String with escapes +#let string-lit(quote: "\"") = { + let escape = seq(char("\\"), any(), map: r => { + let (_, c) = r + if c == "n" { "\n" } + else if c == "t" { "\t" } + else if c == "r" { "\r" } + else if c == "\\" { "\\" } + else if c == quote { quote } + else { c } + }) + + let normal = none-of(quote + "\\") + let char-parser = choice(escape, normal) + + between(char(quote), char(quote), many(char-parser), map: chars => chars.join()) +} + +// ==================== Labels and Options ==================== + +#let label-parser = seq( + char(":"), identifier, + map: parts => { + let (_, id) = parts + id + } +) + +#let label-ref-parser = seq( + char(":"), identifier, + map: parts => { + let (_, id) = parts + (type: "label-ref", label: id) + } +) + +#let bond-label-parser = seq( + str("::"), identifier, + map: parts => { + let (_, id) = parts + id + } +) + +// TODO: Fix this parser to support multiple key-value pairs +// key-value pair (e.g., color: red, angle: 45) +#let key-value-pair-parser = seq( + identifier, token(":"), some(none-of(")")), + map: parts => { + let (id, colon, value) = parts + id + colon + value.join() + } +) + +#let options-parser = seq( + char("("), key-value-pair-parser, char(")"), + map: parts => { + let (_, pairs, _) = parts + (type: "options", pairs: eval("(" + pairs + ")")) + } +) + +// ==================== Fragments ==================== + +// element symbol (e.g., H, Ca, Fe) +#let element-parser = seq( + uppercase, optional(lowercase), + map: parts => { + let (upper, lower) = parts + if lower != none { upper + lower } else { upper } + } +) + +// isotope notation (e.g., ^14, ^235) +#let isotope-parser = seq( + char("^"), integer, + map: parts => { + let (_, num) = parts + num + } +) + +// charge notation (e.g., ^+, ^2-, ^3+) +#let charge-parser = seq( + char("^"), optional(digit), choice(char("+"), char("-")), + map: parts => { + let (_, d, sign) = parts + d + sign + } +) + +#let element-group-parser = seq( + optional(isotope-parser), element-parser, optional(integer), + map: parts => { + let (isotope, element, subscript) = parts + ( + type: "element-group", + isotope: isotope, + element: element, + subscript: subscript + ) + } +) + +// abbreviation (e.g., tBu, iPr) +#let abbreviation-parser = seq( + lowercase, some(letter), + map: parts => { + let (first, rest) = parts + (type: "abbreviation", value: first + rest.join()) + } +) + +// math text notation (e.g., $\\Delta$, $\\mu$) +#let math-text-parser = seq( + char("$"), some(none-of("$")), char("$"), + map: parts => { + let (_, chars, _) = parts + (type: "math-text", value: chars.join()) + } +) + +#let parenthetical-parser(atoms-parser) = seq( + char("("), + atoms-parser, + char(")"), + optional(integer), + map: parts => { + let (_, atoms, _, subscript) = parts + (type: "parenthetical", atoms: atoms, subscript: subscript) + } +) + +// complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+) +#let complex-parser(atoms-parser) = seq( + char("["), + atoms-parser, + char("]"), + map: parts => { + let (_, atoms, _) = parts + (type: "complex", atoms: atoms) + } +) + +#let atoms-part-parser(atoms-parser) = choice( + element-group-parser, + parenthetical-parser(atoms-parser), + complex-parser(atoms-parser) +) + +#let atoms-parser() = { + let self = lazy(() => atoms-parser()) + + seq( + some(atoms-part-parser(self)), optional(charge-parser), + map: parts => { + let (parts, charge) = parts + (type: "atoms", parts: parts, charge: charge) + } + ) +} + +#let fragment-content-parser = choice( + atoms-parser(), + abbreviation-parser, + math-text-parser, +) + + +#let fragment-parser = seq( + fragment-content-parser, optional(label-parser), optional(options-parser), + map: parts => { + let (content, label, options) = parts + ( + type: "fragment", + atoms: process-atom(content), + name: label, + options: if options != none { options.pairs } else { (:) }, + main-index: calc-main-index(content), + ) + } +) + +// ==================== Bonds ==================== + +#let bond-symbol-parser = choice( + str("->"), // Arrow prevention + str("=>"), // Arrow prevention + str(":>"), + str("<:"), + str("|>"), + str("<|"), + char("="), + char("#"), + char("-"), + char(">"), + char("<") +) + +#let bond-parser = seq( + bond-symbol-parser, optional(bond-label-parser), optional(options-parser), + map: parts => { + let (symbol, label, options) = parts + ( + type: "bond", + symbol: symbol, + name: label, + options: if options != none { options.pairs } else { (:) } + ) + } +) + +// ==================== Rings ==================== + +#let ring-size-parser = map( + validate( + some(digit), + digits => { + if digits.len() == 0 { + return (false, "Ring notation (e.g., @6, @5(C-C-C-C-C)) must have at least one digit") + } + let num = int(digits.join()) + (num >= 3, "Ring size must be at least 3") + }, + ), + parts => { + int(parts.join()) + } +) + +// ring notation (e.g., @6, @5(C-C-C-C-C)) +#let ring-parser(mol-parser) = seq( + char("@"), ring-size-parser, + optional(seq(char("("), mol-parser, char(")"))), + optional(label-parser), + optional(options-parser), + map: parts => { + let (_, faces, mol, lbl, opts) = parts + ( + type: "cycle", + faces: faces, + body: if mol != none { mol.at(1) } else { none }, + label: lbl, + options: opts + ) + } +) + +// ==================== Molecules ==================== + +#let node-parser(mol-parser) = choice( + fragment-parser, + label-ref-parser +) + +#let branch-parser(mol-parser) = seq( + char("("), bond-parser, mol-parser, char(")"), + map: parts => { + let (_, bond, molecules, _) = parts + (type: "branch", bond: bond, body: molecules) + } +) + +#let unit-parser(mol-parser) = seq( + optional(node-parser(mol-parser)), many(branch-parser(mol-parser)), many(ring-parser(mol-parser)), + map: parts => { + let (node, branches, rings) = parts + + // Handle label reference as a special unit type + if node != none and node.type == "label-ref" { + ( + type: "unit", + node: node, + branches: branches, + rings: rings, + ) + } else { + ( + type: "unit", + node: if node == none { (type: "implicit") } else { node }, + branches: branches, + rings: rings, + ) + } + } +) + +#let molecule-parser() = { + let self = lazy(() => molecule-parser()) + + seq( + unit-parser(self), + many(seq(bond-parser, unit-parser(self))), + map: nodes => { + let (first, rest) = nodes + + ( + type: "molecule", + first: first, + rest: rest.map(unit => { + let (bond, unit) = unit + (bond: bond, unit: unit) + }) + ) + } + ) +} + +// ==================== Reactions ==================== + +#let op-symbol-parser = choice( + str("<=>"), + str("-->"), + str("->"), + str("=>"), + str("⇌"), + str("→"), + str("⇄"), + char("+"), + math-text-parser +) + +// reaction condition (e.g., [heat], [catalyst]) +#let condition-parser = seq( + char("["), many(none-of("]")), char("]"), + map: parts => { + let (_, chars, _) = parts + (type: "condition", text: chars.join()) + } +) + +#let operator-parser = seq( + ws, optional(condition-parser), op-symbol-parser, optional(condition-parser), ws, + map: parts => { + let (_, cond1, op, cond2, _) = parts + ( + type: "operator", + condition-before: cond1, + op: op, + condition-after: cond2 + ) + } +) + +#let term-parser = seq( + optional(integer), molecule-parser(), + map: parts => { + let (coeff, mol) = parts + ( + type: "term", + coefficient: coeff, + molecule: mol + ) + } +) + +#let reaction-parser = seq( + term-parser, many(seq(operator-parser, term-parser)), + map: parts => { + let (first, rest) = parts + let terms = (first,) + for (operator, term) in rest { + terms.push(operator) + terms.push(term) + } + ( + type: "reaction", + terms: terms + ) + } +) + +// ==================== Parse Functions ==================== + +#let alchemist-parser(input) = { + if input == "" { + return ( + success: true, + value: (type: "reaction", terms: ()), + rest: input + ) + } + + let reaction_result = parse(reaction-parser, input) + + if not reaction_result.success { + return reaction_result + } + + if reaction_result.rest != "" { + let rest = reaction_result.rest + let preview_len = calc.min(10, rest.len()) + let preview = rest.slice(0, preview_len) + + let first_char = rest.at(0) + let error_msg = if first_char >= "0" and first_char <= "9" { + "Unexpected number '" + preview + "' - numbers must be part of subscripts, isotopes, or ring sizes" + } else if first_char == "&" or first_char == "!" or first_char == "%" { + "Invalid character '" + first_char + "' - not a valid bond or atom symbol" + } else if first_char == "^" { + "Invalid isotope or charge notation starting with '" + preview + "'" + } else if first_char == "-" or first_char == "=" or first_char == "#" { + "Unexpected bond '" + first_char + "' - bonds must connect atoms" + } else { + "Unexpected content '" + preview + "' after valid molecule" + } + + return ( + success: false, + value: none, + error: error_msg + " (at position " + repr(input.len() - rest.len()) + ")", + rest: rest + ) + } + + return reaction_result +} diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ new file mode 100644 index 0000000..b177de7 --- /dev/null +++ b/src/elements/molecule/transformer.typ @@ -0,0 +1,333 @@ +#import "iupac-angle.typ": bond-angle, branch-angles, initial-angle, ring-angle, is-vertical-angle +#import "generator.typ": * +#import "../links.typ": single + +#let init_state() = ( + position: (), // Position in the molecule + parent_type: none, // Parent structure type + prev_bond: none, // Previous bond information + next_bond: none, // Next bond information + current_angle: 0deg, // Current absolute angle + visited_labels: (), // Visited labels (prevent circular references) + label_table: (:), // Label table for references +) + +/// Get fragment's main-index (first non-H character index) from a unit +#let get-unit-main-index(unit) = { + if unit == none or unit.node == none or unit.node.type != "fragment" { + return none + } + unit.node.at("main-index", default: 0) +} + +// ============================ Molecule ============================ + +#let transform_fragment(ctx, node) = { + let fragment = generate_fragment(node) + // Register label if present + if node.at("name", default: none) != none { + ctx.label_table.insert(node.name, fragment) + } + (ctx, fragment) +} + +#let transform_bond(ctx, bond, prev_unit: none, next_unit: none) = { + let (ctx, angle) = bond-angle(ctx, bond) + + // connecting points - merge with bond.options to preserve stroke: none etc. + if ctx.parent_type == "cycle" { + return (ctx, generate_bond(bond, angle, (from: 0, to: 0, ..bond.options))) + } + + // For vertical bonds, connect to main-index (first non-H character) + let options = bond.options + + if is-vertical-angle(angle) { + let prev_main = get-unit-main-index(prev_unit) + let next_main = get-unit-main-index(next_unit) + if prev_main != none { + options = (from: prev_main, ..options) + } + if next_main != none { + options = (to: next_main, ..options) + } + } + + (ctx, generate_bond(bond, angle, options)) +} + +#let transform_branch(ctx, branch, transform_molecule_fn, parent_unit: none) = { + // Get first unit of branch body for vertical bond detection + let first_unit = if branch.body != none and branch.body.type == "molecule" { + branch.body.first + } else { none } + let (ctx, bond) = transform_bond(ctx, branch.bond, prev_unit: parent_unit, next_unit: first_unit) + let branch_ctx = ctx + (parent_type: "unit") + let (branch_ctx, body) = transform_molecule_fn(branch_ctx, branch.body) + // Merge label_table back to parent context + ctx.label_table = branch_ctx.label_table + (ctx, generate_branch(bond, body)) +} + +/// Find positions of units that have inner rings +#let find-inner-ring-positions(mol) = { + if mol.type != "molecule" { return () } + let positions = () + if mol.first != none and mol.first.rings.len() > 0 { + positions.push(0) + } + for (idx, item) in mol.rest.enumerate() { + if item.unit != none and item.unit.rings.len() > 0 { + positions.push(idx + 1) + } + } + positions +} + +/// Move rings from last unit to second-to-last unit (for hetero case) +#let move-rings-to-earlier-position(mol) = { + let rest = mol.rest + if rest.len() < 2 { return mol } + + let last_unit = rest.last().unit + if last_unit == none or last_unit.rings.len() == 0 { return mol } + + let second_last_unit = rest.at(-2).unit + let merged_rings = if second_last_unit != none and second_last_unit.rings != none { + (..second_last_unit.rings, ..last_unit.rings) + } else { + last_unit.rings + } + + let new_second_last_unit = if second_last_unit != none { + (..second_last_unit, rings: merged_rings) + } else { + (type: "unit", node: (type: "implicit"), branches: (), rings: merged_rings) + } + + let new_rest = rest.slice(0, -2) + new_rest.push((..rest.at(-2), unit: new_second_last_unit)) + new_rest.push((..rest.at(-1), unit: (..last_unit, rings: ()))) + + (..mol, rest: new_rest) +} + +/// Insert invisible bonds at specified positions (for polycyclic case) +#let insert-invisible-bonds(mol, positions) = { + let new_rest = mol.rest + let invisible_entry = ( + bond: (type: "bond", symbol: "-", name: none, options: (stroke: none)), + unit: (type: "unit", node: (type: "implicit"), branches: (), rings: ()) + ) + for idx in positions.rev() { + new_rest = (..new_rest.slice(0, idx), invisible_entry, ..new_rest.slice(idx)) + } + (..mol, rest: new_rest) +} + +#let transform_cycle(ctx, cycle, transform_molecule_fn, angle: none, absolute: false) = { + let (body, cycle_ctx) = if cycle.body == none { + (range(cycle.faces).map(i => single()).join(), ctx) + } else { + let outer_body_len = if cycle.body.rest != none { cycle.body.rest.len() } else { 0 } + let cycle_ctx = ctx + ( + parent_type: "cycle", + position: ctx.position + ((cycle.faces, 0),), + outer_cycle_body_len: outer_body_len, + outer_cycle_faces: cycle.faces, + ) + + let inner_ring_positions = find-inner-ring-positions(cycle.body) + let bonds_needed = cycle.faces - outer_body_len + + let modified_body = if bonds_needed == 0 and inner_ring_positions.len() > 0 { + // Hetero: move rings earlier so drawer processes them before face-count limit + move-rings-to-earlier-position(cycle.body) + } else if bonds_needed > 0 and inner_ring_positions.len() > 0 { + // Polycyclic: insert invisible bonds + let count = calc.min(bonds_needed, inner_ring_positions.len()) + insert-invisible-bonds(cycle.body, inner_ring_positions.slice(0, count)) + } else { + cycle.body + } + + let (cycle_ctx, transformed) = transform_molecule_fn(cycle_ctx, modified_body) + (transformed, cycle_ctx) + } + + // Merge label_table back to parent context + ctx.label_table = cycle_ctx.label_table + + let hetero = () + if type(body) == array and body.len() > 0 { + if body.at(0).type == "fragment" { + hetero.push(body.at(0)) + body = body.slice(1) + } + if body.len() > 0 and body.last().type == "fragment" { + hetero.push(body.last()) + body = body.slice(0, -1) + } + } + + // Build cycle dict with angle in args (inline to preserve angle without changing generator.typ) + let cycle_dict = ( + type: "cycle", + faces: cycle.faces, + body: body, + args: if angle != none { + if absolute { (absolute: angle) } else { (relative: angle) } + } else { (:) }, + ) + + // All elements stay in body - drawer handles cycles/branches after links naturally + (ctx, (..hetero, cycle_dict)) +} + +#let transform_unit(ctx, unit, transform_molecule_fn) = { + if unit == none { return (ctx, ()) } + + // Process the node + let node = unit.node + let (ctx, generated) = if node != none { + if node.type == "fragment" { + transform_fragment(ctx, node) + } else if node.type == "label-ref" { + (ctx, generate_label_reference(node)) + } else if node.type == "implicit" { + // Implicit node, no action needed + (ctx, ()) + } else { + panic("Unknown node type: " + node.type + " for node: " + repr(node)) + } + } else { + (ctx, ()) + } + + // Process branches + let angles = branch-angles(ctx, unit.branches) + let branches = () + for ((idx, branch), angle) in unit.branches.enumerate().zip(angles) { + let branch_ctx = ctx + ( + parent_type: "branch", + position: ctx.position + ((unit.branches.len(), idx),), + current_angle: ctx.current_angle + angle, + ) + let (branch_ctx, branch_result) = transform_branch( + branch_ctx, + branch, + transform_molecule_fn, + parent_unit: unit + ) + // Merge label_table back + ctx.label_table = branch_ctx.label_table + branches.push(branch_result) + } + + // Process rings + let rings = () + for (idx, ring) in unit.rings.enumerate() { + let (angle, absolute) = ring-angle(ctx, ring, unit.rings.len(), idx) + let ring_ctx = ctx + ( + parent_type: "cycle", + position: ctx.position + ((unit.rings.len(), idx),), + current_angle: if angle != none { angle } else { ctx.current_angle }, + ) + let (ring_ctx, ring_result) = transform_cycle( + ring_ctx, + ring, + transform_molecule_fn, + angle: angle, + absolute: absolute, + ) + // Merge label_table back + ctx.label_table = ring_ctx.label_table + + rings.push(ring_result) + } + + (ctx, (..generated, ..branches.join(), ..rings.join())) +} + +#let transform_molecule(ctx, molecule) = { + if molecule == none or molecule.type != "molecule" { return (ctx, ()) } + + let chain_length = molecule.rest.len() + let position = ctx.position + // Preserve current_angle when inside a branch (parent_type == "unit") + // Reset to initial-angle for top-level and cycle body + let base_angle = if ctx.parent_type == "unit" { + ctx.current_angle + } else { + initial-angle(ctx, molecule) + } + ctx += ( + current_angle: base_angle, + prev_bond: none, + next_bond: if 0 < chain_length { molecule.rest.at(0).bond } else { none }, + position: position + ((chain_length, 0),) + ) + + // Transform first unit + let (ctx, first) = transform_unit( + ctx, + molecule.first, + transform_molecule + ) + + // Transform rest of chain + let rest = () + let prev_unit = molecule.first + if molecule.rest != none and chain_length > 0 { + for (idx, item) in molecule.rest.enumerate() { + let rest_ctx = ctx + ( + prev_bond: ctx.next_bond, + next_bond: if idx + 1 < chain_length { molecule.rest.at(idx + 1).bond } else { none }, + position: position + ((chain_length, idx + 1),), + ) + + let (rest_ctx, bond) = transform_bond(rest_ctx, item.bond, prev_unit: prev_unit, next_unit: item.unit) + let (rest_ctx, unit) = transform_unit(rest_ctx, item.unit, transform_molecule) + ctx = rest_ctx + prev_unit = item.unit + + rest += (..bond, ..unit) + } + } + + (ctx, (..first, ..rest)) +} + +// ============================ Reaction ============================ + +#let transform_term(ctx, molecule) = { + transform_molecule(ctx + (parent_type: none), molecule) +} + +#let transform_operator(ctx, operator) = { + (ctx, generate_operator(operator)) +} + +#let transform_reaction(ctx, reaction) = { + let result = () + for term in reaction.terms { + if term.type == "term" { + let (ctx_new, transformed) = transform_term(ctx, term.molecule) + ctx = ctx_new + result.push(transformed) + } else if term.type == "operator" { + let (ctx_new, transformed) = transform_operator(ctx, term) + ctx = ctx_new + result.push((transformed,)) + } else { + panic("Unknown term type: " + term.type) + } + } + (ctx, result) +} + +#let transform(reaction) = { + let ctx = init_state() + let (_, result) = transform_reaction(ctx, reaction) + result.join() +} diff --git a/src/utils/parser-combinator.typ b/src/utils/parser-combinator.typ new file mode 100644 index 0000000..5e67adb --- /dev/null +++ b/src/utils/parser-combinator.typ @@ -0,0 +1,355 @@ +// Parse state +#let state(input, pos: 0) = ( + input: input, + pos: pos, + len: input.len(), + at: self => if self.pos < self.len { self.input.at(self.pos) } else { none }, + peek: (self, n: 1) => { + if self.pos + n <= self.len { + self.input.slice(self.pos, self.pos + n) + } else { + none + } + }, + advance: (self, n: 1) => state(self.input, pos: self.pos + n), + remaining: self => self.input.slice(self.pos), + is-eof: self => self.pos >= self.len, +) + +// Result types +#let ok(value, state) = (ok: true, value: value, state: state) +#let err(msg, state) = (ok: false, error: msg, state: state) + +// Parser type +#let parser(name, fn) = (name: name, run: fn) + +// ==================== Basic Parsers ==================== + +// Match any character +#let any() = parser("any", s => { + let c = (s.at)(s) + if c != none { + ok(c, (s.advance)(s)) + } else { + err("end of input", s) + } +}) + +// Match specific character +#let char(c) = parser("char(" + c + ")", s => { + let ch = (s.at)(s) + if ch == c { + ok(c, (s.advance)(s)) + } else { + err("expected " + c, s) + } +}) + +// Match string +#let str(text) = parser("str(" + text + ")", s => { + let peek = (s.peek)(s, n: text.len()) + if peek == text { + ok(text, (s.advance)(s, n: text.len())) + } else { + err("expected " + text, s) + } +}) + +// Match one of characters +#let one-of(chars) = parser("one-of", s => { + let c = (s.at)(s) + if c != none and chars.contains(c) { + ok(c, (s.advance)(s)) + } else { + err("expected one of " + chars, s) + } +}) + +// Match none of characters +#let none-of(chars) = parser("none-of", s => { + let c = (s.at)(s) + if c != none and not chars.contains(c) { + ok(c, (s.advance)(s)) + } else { + err("unexpected " + repr(c), s) + } +}) + +// Match with predicate +#let satisfy(pred, name: "satisfy") = parser(name, s => { + let c = (s.at)(s) + if c != none and pred(c) { + ok(c, (s.advance)(s)) + } else { + err(name + " failed", s) + } +}) + +// Match end of input +#let eof() = parser("eof", s => { + if (s.is-eof)(s) { + ok(none, s) + } else { + err("expected end of input", s) + } +}) + +// ==================== Combinators ==================== + +// Map result +#let map(p, f) = parser("map", s => { + let r = (p.run)(s) // map + if r.ok { + ok(f(r.value), r.state) + } else { + r + } +}) + +// Sequence parsers (variadic) +#let seq(..parsers, map: results => results) = { + let ps = parsers.pos() + if ps.len() == 0 { return parser("empty", s => ok((), s)) } + if ps.len() == 1 { return ps.at(0) } + + parser("seq", s => { + let results = () + let current = s + + for p in ps { + let r = (p.run)(current) // seq + if not r.ok { return r } + results.push(r.value) + current = r.state + } + + ok(map(results), current) + }) +} + +// Choice (variadic) +#let choice(..parsers) = { + let ps = parsers.pos() + if ps.len() == 0 { panic("choice requires at least one parser") } + if ps.len() == 1 { return ps.at(0) } + + parser("choice", s => { + for p in ps { + let r = (p.run)(s) // choice + if r.ok { return r } + } + err("no alternative matched", s) + }) +} + +// Optional +#let optional(p) = parser("optional", s => { + let r = (p.run)(s) // optional + if r.ok { + ok(r.value, r.state) + } else { + ok(none, s) + } +}) + +// Optional with default value +#let optional-default(p, default) = map( + optional(p), + v => if v != none { v } else { default } +) + +// Zero or more +#let many(p) = parser("many", s => { + let results = () + let current = s + + while true { + let r = (p.run)(current) // many + if not r.ok { break } + results.push(r.value) + current = r.state + } + + ok(results, current) +}) + +// One or more +#let some(p) = parser("some", s => { + let first = (p.run)(s) // some + if not first.ok { return first } + + let rest = (many(p).run)(first.state) + ok((first.value,) + rest.value, rest.state) +}) + +// Between delimiters +#let between(left, right, p) = parser("between", s => { + let l = (left.run)(s) // between + if not l.ok { return l } + + let m = (p.run)(l.state) + if not m.ok { return m } + + let r = (right.run)(m.state) + if not r.ok { return r } + + ok(m.value, r.state) +}) + +// Separated by +#let sep-by(p, separator) = parser("sep-by", s => { + let first = (p.run)(s) // sep-by + if not first.ok { return ok((), s) } + + let results = (first.value,) + let current = first.state + + while true { + let sep = (separator.run)(current) // sep-by + if not sep.ok { break } + + let item = (p.run)(sep.state) + if not item.ok { break } + + results.push(item.value) + current = item.state + } + + ok(results, current) +}) + +// Validate parsed value +#let validate(p, validator) = parser("validate", s => { + let result = (p.run)(s) // validate + if not result.ok { return result } + + let (valid, error-msg) = validator(result.value) + if not valid { + return err(error-msg, s) + } + + ok(result.value, result.state) +}) + +// Separated by (at least one) +#let sep-by1(p, separator) = parser("sep-by1", s => { + let first = (p.run)(s) // sep-by1 + if not first.ok { return first } + + let rest = (sep-by(p, separator).run)(first.state) + if rest.value.len() == 0 { + ok((first.value,), first.state) + } else { + ok((first.value,) + rest.value, rest.state) + } +}) + +// Count exact +#let count(n, p) = parser("count", s => { + let results = () + let current = s + + for i in range(n) { + let r = (p.run)(current) // count + if not r.ok { return err("expected " + repr(n) + " items, got " + repr(i), current) } + results.push(r.value) + current = r.state + } + + ok(results, current) +}) + +// Lookahead - check without consuming +#let lookahead(p) = parser("lookahead", s => { + let r = (p.run)(s) // lookahead + if r.ok { + ok(r.value, s) // Don't advance + } else { + r + } +}) + +// Negative lookahead +#let not-ahead(p) = parser("not", s => { + let r = (p.run)(s) // not + if r.ok { + err("unexpected " + repr(r.value), s) + } else { + ok(none, s) + } +}) + +// Attempt - backtrack on failure +#let attempt(p) = parser("attempt", s => { + (p.run)(s) // attempt +}) + +// Label for better errors +#let label(p, lbl) = parser(lbl, s => { + let r = (p.run)(s) // label + if not r.ok { + // Create a more descriptive error message + let context_str = if s.pos < s.len { + let preview = (s.peek)(s, n: calc.min(10, s.len - s.pos)) + " at '" + preview + "'" + } else { + " at end of input" + } + err("Expected " + lbl + context_str + " (got: " + r.error + ")", s) + } else { + r + } +}) + +// Chain left - for left-associative operators +#let chainl(p, op, default: none) = parser("chainl", s => { + let first = (p.run)(s) // chainl + if not first.ok { + if default != none { + return ok(default, s) + } + return first + } + + let acc = first.value + let current = first.state + + while true { + let o = (op.run)(current) // chainl + if not o.ok { break } + + let next = (p.run)(o.state) // chainl + if not next.ok { break } + + acc = (o.value)(acc, next.value) + current = next.state + } + + ok(acc, current) +}) + +// Lazy parser - defers evaluation until needed +#let lazy(thunk) = parser("lazy", s => { + let p = thunk() // lazy + (p.run)(s) // lazy +}) + + +// Run parser +#let parse(p, input) = { + let s = state(input) + let r = (p.run)(s) // parse + ( + success: r.ok, + value: if r.ok { r.value } else { none }, + error: if not r.ok { + let pos_info = if r.state.pos > 0 { + " (at position " + repr(r.state.pos) + ")" + } else { + "" + } + r.error + pos_info + } else { none }, + rest: (r.state.remaining)(r.state), + ) +} diff --git a/tests/cetz-skeleton-anchors/test.typ b/tests/cetz-skeleton-anchors/test.typ index 0465c00..e4e5b63 100644 --- a/tests/cetz-skeleton-anchors/test.typ +++ b/tests/cetz-skeleton-anchors/test.typ @@ -5,17 +5,17 @@ #set page(width: auto, height: auto, margin: 0.5em) #let molecule-R1 = draw-skeleton(name: "mol1", mol-anchor: "east", { - molecule(name: "A", "H_2N") + fragment(name: "A", "H_2N") single() - molecule(name: "B", "CH") + fragment(name: "B", "CH") branch({ single(angle: 6) - molecule( + fragment( "R_1", ) }) single() - molecule(name: "cooh", "COOH") + fragment(name: "cooh", "COOH") }) #canvas({ diff --git a/tests/molecule-edge-cases/.gitignore b/tests/molecule-edge-cases/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-edge-cases/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ new file mode 100644 index 0000000..4eb53c2 --- /dev/null +++ b/tests/molecule-edge-cases/test.typ @@ -0,0 +1,187 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/parser.typ": alchemist-parser +#import "../../src/elements/molecule/transformer.typ": transform +#import "../../src/elements/molecule/molecule.typ": molecule + +// Error handling and edge cases test += Molecule Edge Cases and Error Handling Tests + +#let test-parse(input, description) = { + let parsed = alchemist-parser(input) + if not parsed.success { + return [ + == #description + #text(fill: red)[ + Failed to parse "#input": #parsed.error + ] + ] + } + + let reaction = parsed.value + let result = transform(reaction) + + [ + == #description + ✓ Input: #input + #skeletize(result) + #linebreak() + Parsed successfully with #parsed.value.terms.len() nodes + // #repr(reaction) + #linebreak() + // #repr(result) + // #linebreak() + ] +} + += Parser edge cases +// Empty input +// #test-parse("", "Empty input") + +// Whitespace only +#test-parse(" ", "Whitespace only") + +// Single atom +#test-parse("C", "Single atom") +#test-parse("H", "Single hydrogen") +#test-parse("Cl", "Single chlorine") +#test-parse("C123H456", "Very large numbers") + +// Bond only (no atom) +#test-parse("-", "Bond only") +#test-parse("=", "Double bond only") +#test-parse("#", "Triple bond only") +#test-parse(">", "Arrow bond only") +#test-parse("<", "Arrow bond only") +#test-parse(":>", "Arrow bond only") +#test-parse("<:", "Arrow bond only") +#test-parse("|>", "Arrow bond only") +#test-parse("<|", "Arrow bond only") + +// Incomplete bond +#test-parse("CH3-", "Trailing bond") +#test-parse("-CH3", "Leading bond") +#test-parse("CH3--CH3", "Double dash") +#test-parse("CH3-A(-CH3)(-CH3)-CH3", "Multiple branches") + +// Invalid parenthesis +#test-parse("CH3(", "Unclosed parenthesis") +#test-parse("CH3)", "Extra closing parenthesis") +#test-parse("CH3(-OH", "Unclosed branch") +#test-parse("CH3-OH)", "Extra closing in chain") + +// Deeply nested structure +#test-parse("-(-OH)", "Deeply nested (1 levels)") +#test-parse("-(-(-OH))", "Deeply nested (2 levels)") +#test-parse("-(-(-(-OH)))", "Deeply nested (3 levels)") +#test-parse("-(-(-(-(-OH))))", "Deeply nested (4 levels)") +#test-parse("-(-(-(-(-(-OH)))))", "Deeply nested (5 levels)") +#test-parse("-(-(-(-(-(-(-OH))))))", "Deeply nested (6 levels)") +#test-parse("-(-(-(-(-(-(-(-OH)))))))", "Deeply nested (7 levels)") +#test-parse("-(-(-(-(-(-(-(-(-OH))))))))", "Deeply nested (8 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-OH)))))))))", "Deeply nested (9 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-(-OH))))))))))", "Deeply nested (10 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))", "Deeply nested (11 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))", "Deeply nested (12 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))", "Deeply nested (13 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))", "Deeply nested (14 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))", "Deeply nested (15 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))", "Deeply nested (16 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))))", "Deeply nested (17 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))))", "Deeply nested (18 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))))))", "Deeply nested (19 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))))))", "Deeply nested (20 levels)") + +#let many-branches = "C(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)" +#test-parse(many-branches, "10 branches on single carbon") + +// Complex branching patterns +#test-parse("C()()()()", "Empty branches") +#test-parse("C(-CH3)()(-OH)", "Mixed empty and filled branches") +#test-parse("C(-)(-)(=)", "Branches with only bonds") + +// #test-parse("$CH_3$-CH2-OH", "Typst math notation") + +// Long chain +#let long-chain = "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-OH" +#test-parse(long-chain, "Very long chain (50 CH2 units)") + += Ring structure edge cases + +#test-parse("@1", "1-membered ring (invalid)") +#test-parse("@2", "2-membered ring (invalid)") +#test-parse("@3", "3-membered ring") +#test-parse("@4", "4-membered ring") +#test-parse("@5", "5-membered ring") +#test-parse("@6", "6-membered ring") +#test-parse("@7", "7-membered ring") +#test-parse("@8", "8-membered ring") +#test-parse("@10", "10-membered ring") +#test-parse("@15", "15-membered ring") +#test-parse("@20", "20-membered ring") + +// Ring bond patterns +#test-parse("@6(------)", "Ring with explicit single bonds") +#test-parse("@6(=-=-=-)", "Benzene with alternating bonds") +#test-parse("@6(-AB-AB-AB-AB-AB-AB)", "connecting points") +#test-parse("@6((-)-(-)-(-)-(-)-(-)-(-)-)", "branches in cycle") +#test-parse("@5((-)-(-)-(-)-(-)-(-)-)", "branches in cycle") +#test-parse("@6((-)(-)-(-)(-)-(-)(-)-(-)(-)-(-)(-)-(-)(-)-)", "2 branches in each edge in cycle") +#test-parse("@6((-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-)", "branches in branches in cycle") +#test-parse("@6((-)-(-)-(-)-(-)-(-)-)", "branches in cycle") + +// Ring substituents +#test-parse("CH3-@6", "Methyl attached to ring") +#test-parse("@6-CH3", "Ring attached to methyl") +#test-parse("@6(-CH3)", "Ring with adjacent substituents") +#test-parse("CH3-@6-CH3", "Ring in middle of chain") +#test-parse("CH3-CH2-@6-CH2-CH3", "Ring embedded in chain") +#test-parse("@6-@5", "Different sized rings connected") +#test-parse("@6--@5", "Different sized rings connected") +#test-parse("@6-(-@6)-(-@6)-@6", "Rings connected by methylene") +#test-parse("@3-(-@3)-(-@3)-@3", "Rings connected by methylene") + +// Invalid ring notation (expected parse error) +#test-parse("@", "Asterisk without size") +#test-parse("@0", "Zero-sized ring") +#test-parse("@-1", "Negative ring size") +#test-parse("@a", "Non-numeric ring size") +#test-parse("@6.5", "Decimal ring size") + +// Ring with empty parentheses +// #test-parse("@6()", "Ring with empty parentheses") +#test-parse("@6(())", "Ring with nested empty parentheses") +// #test-parse("@6(CH3)", "Ring with atom in parentheses (invalid)") +#test-parse("@6(-)", "Ring with only bond") +// #test-parse("@6((-))", "Ring with parenthesized bond") +#test-parse("@6(-=-=-(-O-CH3)=)", "Ring with carboxyl group") + +// Label special cases +// #test-parse("CH3:", "Label without name") +// #test-parse("CH3::", "Double colon") +// #test-parse("CH3:label1:label2", "Multiple labels") +// #test-parse(":labelonly", "Label without atom") + +// Number processing + += Conversion edge cases +// Circular reference possibility +// Ring structure nested test is omitted + +// Interchangeable bond patterns +// Complex bond patterns are omitted + +// All bond types +// Complex bond types are omitted + += Actually chemically invalid structures + += Maximum case +#test-parse("CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2OH", "Long atom string") +#test-parse("CH3:verylonglabelnamethatshouldstillwork", "Long label name") + += Unicode and special characters + +// #test-parse("CH₃-CH₂-OH", "Unicode subscripts") +// #test-parse("CH³⁺", "Unicode superscript charge") +// #test-parse("CH3–CH2–OH", "En dash bonds") +// #test-parse("CH3−CH2−OH", "Minus sign bonds") diff --git a/tests/molecule-integration/.gitignore b/tests/molecule-integration/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-integration/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-integration/test.typ b/tests/molecule-integration/test.typ new file mode 100644 index 0000000..0824136 --- /dev/null +++ b/tests/molecule-integration/test.typ @@ -0,0 +1,125 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/molecule.typ": molecule + += Molecule Integration Tests + +== Organic Compounds + +=== Ethanol +#skeletize(molecule("CH3-CH2-OH")) + +=== Isopropanol +#skeletize(molecule("CH3-CH(-OH)-CH3")) + +=== Acetone +#skeletize(molecule("CH3-C(=O)-CH3")) + +=== Acetic Acid +#skeletize(molecule("CH3-C(=O)-OH")) + +=== Benzene Ring Structure +#skeletize(molecule("@6(-=-=-=)")) + +== Amino Acids + +=== Glycine +#skeletize(molecule("NH2-CH2-C(=O)-OH")) + +=== Alanine +#skeletize(molecule("NH2-CH(-CH3)-C(=O)-OH")) + +=== Serine +#skeletize(molecule("NH2-CH(-CH2(-OH))-C(=O)-OH")) + +== Sugars + +=== Linear Glucose +#skeletize(molecule("CHO-CH(-OH)-CH(-OH)-CH(-OH)-CH(-OH)-CH2OH")) + +=== Linear Fructose +#skeletize(molecule("CH2OH-C(=O)-CH(-OH)-CH(-OH)-CH(-OH)-CH2OH")) + +== Fatty Acids + +=== Butyric Acid +#skeletize(molecule("CH3-CH2-CH2-C(=O)-OH")) + +=== Palmitic Acid +#skeletize(molecule("CH3-(CH2)14-C(=O)-OH")) + +== Complex Branching Structures + +=== tert-Butyl Alcohol +#skeletize(molecule("C(-CH3)(-CH3)(-CH3)-OH")) + +=== Neopentane +#skeletize(molecule("C(-CH3)(-CH3)(-CH3)-CH3")) + +=== Complex Branching Alcohol +#skeletize(molecule("CH3-C(-CH3)(-CH2(-OH))-CH2-CH3")) + +== Unsaturated Compounds + +=== Ethylene +#skeletize(molecule("CH2=CH2")) + +=== Acetylene +#skeletize(molecule("HC#CH")) + +=== Butadiene +#skeletize(molecule("CH2=CH-CH=CH2")) + +=== Acrylic Acid +#skeletize(molecule("CH2=CH-C(=O)-OH")) + +== Cyclic Compounds + +=== Cyclohexane +#skeletize(molecule("@6(------)")) + +=== Cyclohexanol +#skeletize(molecule("@6(-----(-OH)-)")) + +=== Methylcyclohexane +#skeletize(molecule("@6(------)-CH3")) + +=== 1,4-Dimethylcyclohexane +#skeletize(molecule("@6((-CH3)---(-CH3)---)")) + +== Labeled Structures + +=== Reaction Site Marking +#skeletize(molecule("CH3:start-CH2-CH2-OH:end")) + +=== Substituent Identification +#skeletize(molecule("CH3-CH:carbon(-OH:hydroxyl)-CH3")) + +== Stereochemistry + +=== Wedge Bond (Stereochemistry) +#skeletize(molecule("CH3CH3")) + +=== Dashed Wedge Bond +#skeletize(molecule("CH3<|CH(-OH)|>CH3")) + +== Polymers + +=== Polyethylene Unit +// #skeletize(molecule("(-CH2-CH2-)n")) + +=== Polystyrene Unit +// #skeletize(molecule("(-CH2-CH(-@6)-)n")) + +== Complex Natural Compounds (Simplified) + +=== Caffeine Skeleton (Simplified) +#skeletize(molecule("@6((=O)-N(-)-@5(-N=-N(-)-)=-(=O)-N(-)-)")) +#skeletize(molecule("@6((=O)-N(-)-@5(-N=-N(-)-=)-(=O)-N(-)-)")) + +== Pharmaceutical Skeleton (Simplified) + +=== Aspirin +#skeletize(molecule("@6(-=-(-O-(=O)-CH3)=(-(=O)-OH)-=)")) + +=== Paracetamol +#skeletize(molecule("@6((-OH)-=-(-NH-(=O)-CH3)=-=)")) diff --git a/tests/molecule-ring/.gitignore b/tests/molecule-ring/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-ring/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-ring/test.typ b/tests/molecule-ring/test.typ new file mode 100644 index 0000000..47daeff --- /dev/null +++ b/tests/molecule-ring/test.typ @@ -0,0 +1,49 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/parser.typ": alchemist-parser +#import "../../src/elements/molecule/transformer.typ": transform +#import "../../src/elements/molecule/molecule.typ": molecule + +// Error handling and edge cases test += Molecule Edge Cases and Error Handling Tests + +#let test-parse(input, description) = { + let parsed = alchemist-parser(input) + if not parsed.success { + return [ + == #description + #text(fill: red)[ + Failed to parse "#input": #parsed.error + ] + ] + } + + let reaction = parsed.value + let result = transform(reaction) + + [ + == #description + ✓ Input: #input + #skeletize(result) + #linebreak() + Parsed successfully with #parsed.value.terms.len() nodes + // #repr(reaction) + #linebreak() + // #repr(result) + // #linebreak() + ] +} + += Parser edge cases +#test-parse("@6((-)-(-)-(-)-(-)-(-)-(-)-)", "ring") +#test-parse("@6((-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-)", "ring") +#test-parse("@6(-(-CH3)(-CH3)-----)", "ring") +#test-parse("@6(-(-CH3)-----)", "ring") +#test-parse("CH3-@6-CH3", "ring") +#test-parse("@6-CH3", "ring") +#test-parse("CH3-@6", "ring") +#test-parse("@6(-----@6(-----))", "fused ring (5+5)") +#test-parse("@6(------@6(-----))", "fused ring (6+5)") +#test-parse("@6(-----@6(------))", "fused ring (5+6)") +#test-parse("@6(------@6(------))", "hetero ring (6+6)") + + \ No newline at end of file diff --git a/tests/resonance/test.typ b/tests/resonance/test.typ index bc87a6a..16e3a20 100644 --- a/tests/resonance/test.typ +++ b/tests/resonance/test.typ @@ -8,14 +8,14 @@ config: (angle-increment: 15deg), { import cetz.draw: * - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("E") + fragment("E") }) branch({ double(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -24,7 +24,7 @@ ) }) single(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: -45deg), @@ -33,7 +33,7 @@ name: "to", ) single(angle: 2, name: "from") - molecule("H", name: "H") + fragment("H", name: "H") hobby( stroke: (red), (to: "from", rel: (0, 3pt)), @@ -42,7 +42,7 @@ mark: (end: ">", fill: red), ) plus(atom-sep: 5em) - molecule( + fragment( "B", lewis: ( lewis-double(angle: 180deg), @@ -64,14 +64,14 @@ r: "]", l: "[", { - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ double(angle: 6, name: "double") - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -82,7 +82,7 @@ }) branch({ single(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -102,14 +102,14 @@ operator(math.stretch(sym.arrow.r.l, size: 2em)) - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ single(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -120,7 +120,7 @@ }) branch({ single(angle: -2, name: "single") - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -140,14 +140,14 @@ operator(math.stretch(sym.arrow.r.l, size: 2em)) - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ single(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -158,7 +158,7 @@ }) branch({ double(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: -135deg),