From 59f5e4b0d6ce2e0ebf4b9355e6e4734bb2780864 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Mon, 8 Sep 2025 12:37:47 +0900 Subject: [PATCH 01/30] feat: initial implement for molecule --- lib.typ | 1 + src/molecule/iupac-angle.typ | 328 ++++++++++++++++ src/molecule/molecule.typ | 10 + src/molecule/parser.typ | 718 +++++++++++++++++++++++++++++++++++ src/molecule/transformer.typ | 408 ++++++++++++++++++++ 5 files changed, 1465 insertions(+) create mode 100644 src/molecule/iupac-angle.typ create mode 100644 src/molecule/molecule.typ create mode 100644 src/molecule/parser.typ create mode 100644 src/molecule/transformer.typ diff --git a/lib.typ b/lib.typ index 7d1f5c1..88471b2 100644 --- a/lib.typ +++ b/lib.typ @@ -1,6 +1,7 @@ #import "@preview/cetz:0.4.1" #import "src/default.typ": default #import "src/utils/utils.typ" +#import "src/molecule/molecule.typ": molecule #import "src/drawer.typ" #import "src/drawer.typ": skeletize, draw-skeleton, skeletize-config, draw-skeleton-config #import "src/elements/links.typ": * diff --git a/src/molecule/iupac-angle.typ b/src/molecule/iupac-angle.typ new file mode 100644 index 0000000..5b52312 --- /dev/null +++ b/src/molecule/iupac-angle.typ @@ -0,0 +1,328 @@ +// IUPAC-compliant molecular structure angle calculation module +// Assigns appropriate angles based on vertex bonding states according to IUPAC rules + +#import "@preview/cetz:0.2.2" + +// Basic angle definitions (relative angles) +#let ANGLE-STRAIGHT = 0deg // Straight line +#let ANGLE-REVERSE = 180deg // Reverse +#let ANGLE-UP = 60deg // Upward (for zigzag pattern) +#let ANGLE-DOWN = -60deg // Downward (for zigzag pattern) +#let MAIN-CHAIN-INITIAL-ANGLE = 30deg // Main chain initial angle +#let BRANCH-ANGLE-STEP = 60deg // Angle step between branches +#let FULL-CIRCLE = 360deg // Full circle angle + +// Determine up/down for zigzag pattern +#let get-zigzag-angle(index) = { + if calc.rem(index, 2) == 0 { ANGLE-UP } else { ANGLE-DOWN } +} + +// Determine node hybridization state +#let determine-hybridization(node-id, graph) = { + // Use cache to avoid duplicate calculations + let node-edges = graph.edges.filter(e => e.from == node-id or e.to == node-id) + + let double-bond-count = 0 + for edge in node-edges { + // Triple bond indicates sp hybridization + if edge.data != none and edge.data.bondType == "triple" { + return "sp" + } + // Count double bonds + if edge.data != none and edge.data.bondType == "double" { + double-bond-count += 1 + } + } + + // Determine hybridization state + return if double-bond-count == 2 { "sp" } + else if double-bond-count == 1 { "sp2" } + else { "sp3" } +} + +// Calculate main chain angle (relative angle) +// Zigzag pattern: up→down→up... +#let calculate-main-chain-angle(edge-index, hybridization, is-first-edge, main-chain-length) = { + // sp hybridization means straight line + if hybridization == "sp" { + return ANGLE-STRAIGHT + } + + // Process first edge + if is-first-edge { + if main-chain-length >= 2 { + return MAIN-CHAIN-INITIAL-ANGLE + } else { + return ANGLE-STRAIGHT + } + } + + // Zigzag pattern (based on edge index) + return get-zigzag-angle(edge-index) +} + +// Calculate branch angles (relative angles) +// Equally spaced in appropriate range based on situation +#let calculate-branch-angles(node-id, graph, edge-index, is-root-node, main-chain-length, branch-edges, has-incoming-main, has-outgoing-main) = { + let branch-angles = () + let total-branches = branch-edges.len() + + if total-branches == 0 { return branch-angles } + + // Receive connection status from upstream + let has-incoming = has-incoming-main + let has-outgoing = has-outgoing-main + + // Determine split range and direction (simplified) + let center-angle = if not has-outgoing { + ANGLE-STRAIGHT + } else if not has-incoming { + ANGLE-REVERSE + } else { + // Reverse direction of zigzag pattern + - get-zigzag-angle(edge-index) + } + + // Case of single branch + if total-branches == 1 { + branch-angles.push(center-angle) + return branch-angles + } + + // Calculate angle step for multiple branches + let step-angle = if has-incoming and has-outgoing { + BRANCH-ANGLE-STEP / (total-branches - 1) + } else { + let n = total-branches + if has-incoming { 1 } + if has-outgoing { 1 } + FULL-CIRCLE / n + } + + // Place branch angles at equal intervals + let start-angle = center-angle - step-angle * (total-branches - 1) / 2 + for i in range(total-branches) { + branch-angles.push(start-angle + i * step-angle) + } + + return branch-angles +} + +// Calculate angles for all edges from node +#let calculate-edge-angles(node-id, graph, edge-index, is-root-node, main-chain-length, node-info: none) = { + // Get edges and hybridization from node info (calculate defaults if not present) + let edges = node-info.at("edges", default: graph.edges.filter(e => e.from == node-id)) + + if edges.len() == 0 { + return (:) + } + + let hybridization = node-info.at("hybridization", default: determine-hybridization(node-id, graph)) + + // Classify edges by role (efficiently process in single loop) + let main-edges = () + let branch-edges = () + + // Classify edges and check connection status simultaneously + let has-incoming-main = false + let has-outgoing-main = false + + for edge in edges { + let role = edge.data.at("role", default: "main") + if role == "branch" { + branch-edges.push(edge) + } else { + main-edges.push(edge) + has-outgoing-main = true + } + } + + // Check incoming edges (only needed when has-outgoing is true) + if branch-edges.len() > 0 { + for edge in graph.edges { + if edge.to == node-id and edge.data.at("role", default: "main") == "main" { + has-incoming-main = true + break + } + } + } + + // Dictionary to store angles + let angles = (:) + for (index, edge) in main-edges.enumerate() { + let is-first-edge = is-root-node and index == 0 + let angle = calculate-main-chain-angle(edge-index + index, hybridization, is-first-edge, main-chain-length) + let key = str(edge.from) + "->" + str(edge.to) + angles.insert(key, angle) + } + + // Calculate branch edge angles + if branch-edges.len() > 0 { + let branch-angles-list = calculate-branch-angles( + node-id, graph, edge-index, is-root-node, main-chain-length, + branch-edges, has-incoming-main, has-outgoing-main + ) + for (index, edge) in branch-edges.enumerate() { + let angle = branch-angles-list.at(index) + let key = str(edge.from) + "->" + str(edge.to) + angles.insert(key, angle) + } + } + + return angles +} + +// Calculate main chain length +#let count-main-chain-edges(graph, start-node) = { + let visited = () + let count = 0 + let current-node = start-node + + while current-node != none { + if current-node in visited { break } + visited.push(current-node) + + let main-edges = graph.edges.filter(e => + e.from == current-node and + e.data.at("role", default: "main") == "main" + ) + + if main-edges.len() > 0 { + count += 1 + current-node = main-edges.at(0).to + } else { + break + } + } + + return count +} + +// Traverse graph and calculate angles +#let traverse-and-calculate(graph, node-id, visited, edge-index, angles, is-root, node-cache: none) = { + if node-id in visited { return (angles, edge-index) } + visited.push(node-id) + + let main-chain-length = if is-root { + count-main-chain-edges(graph, node-id) + } else { + 0 + } + + // Get information from cache (empty dictionary if not present) + let node-info = if node-cache != none { node-cache.at(node-id, default: (:)) } else { (:) } + + let node-angles = calculate-edge-angles( + node-id, graph, edge-index, is-root, main-chain-length, + node-info: node-info + ) + + // Merge angles (more efficient merging) + angles = angles + node-angles + + // Get edges (from node-info, or filter if not present) + let edges = node-info.at("edges", default: graph.edges.filter(e => e.from == node-id)) + + let next-edge-index = edge-index + + for edge in edges { + if edge.to not in visited { + let role = edge.data.at("role", default: "main") + if role == "main" { + // For main chain, advance the index + next-edge-index = next-edge-index + 1 + let (new-angles, new-index) = traverse-and-calculate(graph, edge.to, visited, next-edge-index, angles, false, node-cache: node-cache) + angles = new-angles + next-edge-index = new-index + } else if role == "branch" { + // For branches, start with new index + let (new-angles, _) = traverse-and-calculate(graph, edge.to, visited, 0, angles, false, node-cache: node-cache) + angles = new-angles + } + } + } + + return (angles, next-edge-index) +} + +// ===== Main Functions ===== + +// Calculate angles for entire graph +#let calculate-all-angles(graph) = { + // Efficient search for root node + let root = graph.at("root", default: none) + if root == none and graph.nodes.len() > 0 { + // Prioritize searching for node_0 + root = if "node_0" in graph.nodes { "node_0" } else { graph.nodes.keys().first() } + } + + if root == none { return (:) } + + // Pre-build node info cache (performance optimization for large graphs) + let node-cache = (:) + if graph.edges.len() > 50 { // Use cache only for large graphs + for edge in graph.edges { + let from-node = edge.from + if from-node not in node-cache { + node-cache.insert(from-node, (edges: (), hybridization: none)) + } + node-cache.at(from-node).edges.push(edge) + } + // Pre-calculate hybridization state for each node + for (node-id, _) in graph.nodes { + // Create entries for all nodes when building node-cache + if node-id not in node-cache { + node-cache.insert(node-id, (edges: (), hybridization: none)) + } + node-cache.at(node-id).hybridization = determine-hybridization(node-id, graph) + } + } + + let visited = () + let angles = (:) + let (final-angles, _) = traverse-and-calculate( + graph, root, visited, 0, angles, true, + node-cache: if node-cache.len() > 0 { node-cache } else { none } + ) + + return final-angles +} + +// Calculate ring rotation angle (using relative angles only) +#let calculate-ring-rotation(node-id, graph, angles) = { + let outgoing-edges = graph.edges.filter(e => + e.from == node-id and e.data.at("role", default: "main") == "main" + ) + let incoming-edges = graph.edges.filter(e => + e.to == node-id and e.data.at("role", default: "main") == "main" + ) + + let node = graph.nodes.at(node-id) + let ring-size = node.data.size + let has-incoming = incoming-edges.len() > 0 + let has-outgoing = outgoing-edges.len() > 0 + let base-adjustment = 180deg / ring-size - 90deg + + if has-incoming and not has-outgoing { + return ANGLE-STRAIGHT + base-adjustment + } + + let outgoing-angle = 0deg + for edge in outgoing-edges { + if edge.from == node-id { + let edge-key = str(edge.from) + "->" + str(edge.to) + outgoing-angle = angles.at(edge-key, default: 0deg) + break + } + } + + if not has-incoming and has-outgoing { + return ANGLE-REVERSE + outgoing-angle + base-adjustment + } + + if has-incoming and has-outgoing { + // Determine ring orientation (reverse of zigzag pattern) + let inverse-angle = if outgoing-angle == ANGLE-UP { ANGLE-DOWN } else { ANGLE-UP } + return inverse-angle + base-adjustment + } + + return base-adjustment +} diff --git a/src/molecule/molecule.typ b/src/molecule/molecule.typ new file mode 100644 index 0000000..7fc463d --- /dev/null +++ b/src/molecule/molecule.typ @@ -0,0 +1,10 @@ +#import "parser.typ": parse-molecule +#import "transformer.typ": transform + +#let molecule(content, name: none, ..args) = { + let graph = parse-molecule(content) + + let elements = transform(graph) + + elements +} diff --git a/src/molecule/parser.typ b/src/molecule/parser.typ new file mode 100644 index 0000000..6d006c1 --- /dev/null +++ b/src/molecule/parser.typ @@ -0,0 +1,718 @@ +/* + molecule ::= node? (bond_node_pair)* + + bond_node_pair::= bond node? + node ::= (fragment | ring) branch* + + fragment ::= ATOM_STRING label? options? + bond ::= BOND_SYMBOL bond_label? options? + branch ::= "(" bond molecule ")" + ring ::= "*" DIGIT+ "(" molecule? ")" label? options? + + label ::= ":" IDENTIFIER + bond_label ::= "::" IDENTIFIER + options ::= "(" key_value_pair ("," key_value_pair)* ")" + key_value_pair::= IDENTIFIER ":" value + value ::= STRING | NUMBER | IDENTIFIER +*/ + +#let create-parser-context(input, config: (:)) = { + let mainInput = input + let remoteConnections = () + + if type(input) == content { + let lines = input.text.split("\n").filter(line => line.trim() != "") + if lines.len() > 0 { + mainInput = lines.at(0) + remoteConnections = lines.slice(1) + } + } else if type(input) == str { + mainInput = input + } else { + mainInput = str(input) + } + + ( + input: mainInput, + position: 0, + length: mainInput.len(), + graph: ( + nodes: (:), + edges: (), + nodeCounter: 0, + edgeCounter: 0, + root: none, + labels: (:), + bondLabels: (:), + ), + lastNodeId: none, + config: config, + remoteConnections: remoteConnections, + ) +} + +// Create sub-context (for parsing ring content) +#let create-sub-context(parent-ctx) = { + ( + input: parent-ctx.input, + position: parent-ctx.position, + length: parent-ctx.length, + graph: ( + nodes: (:), + edges: (), + nodeCounter: 0, + edgeCounter: 0, + root: none, + labels: (:), + bondLabels: (:), + ), + lastNodeId: none, + config: parent-ctx.config, + remoteConnections: (), + ) +} + +#let create-node(nodeType: "fragment", data: (:)) = { + ( + id: none, + type: nodeType, + data: data, + ) +} + +#let create-edge(fromId, toId, edgeType: "bond", data: (:)) = { + ( + id: none, + from: fromId, + to: toId, + type: edgeType, + data: data, + ) +} + +#let add-node-to-graph(ctx, node) = { + let nodeId = "node_" + str(ctx.graph.nodeCounter) + node.id = nodeId + ctx.graph.nodeCounter += 1 + ctx.graph.nodes.insert(nodeId, node) + + if "label" in node.data and node.data.label != none { + ctx.graph.labels.insert(node.data.label, nodeId) + } + + if ctx.graph.root == none { + ctx.graph.root = nodeId + } + + return (nodeId, ctx) +} + +#let add-edge-to-graph(ctx, edge) = { + let edgeId = "edge_" + str(ctx.graph.edgeCounter) + edge.id = edgeId + ctx.graph.edgeCounter += 1 + ctx.graph.edges.push(edge) + + if "label" in edge.data and edge.data.label != none { + ctx.graph.bondLabels.insert(edge.data.label, edgeId) + } + + return ctx +} + +#let peek-char(ctx) = { + if ctx.position >= ctx.length { return none } + ctx.input.at(ctx.position) +} + +#let peek-string(ctx, length) = { + let end = calc.min(ctx.position + length, ctx.length) + ctx.input.slice(ctx.position, end) +} + +#let advance(ctx, count: 1) = { + ctx.position += count + ctx +} + +#let skip-whitespace(ctx) = { + while ctx.position < ctx.length { + let char = peek-char(ctx) + if char != " " and char != "\t" { break } + ctx = advance(ctx) + } + ctx +} + +#let ATOM_STRING_PATTERN = regex("^([A-Z][a-z]?(\d+)?)+(_[^\s\(\)\[\]:,=\-<>#]+|\^[^\s\(\)\[\]:,=\-<>#]+)*") +#let IDENTIFIER_PATTERN = regex("^[a-zA-Z_][a-zA-Z0-9_]*") +#let DIGIT_PATTERN = regex("^\d+") + +#let parse-identifier(ctx) = { + let remaining = ctx.input.slice(ctx.position) + let match = remaining.match(IDENTIFIER_PATTERN) + + if match != none and match.start == 0 { + ctx.position += match.text.len() + return (match.text, ctx) + } + + return (none, ctx) +} + +#let parse-digits(ctx) = { + let remaining = ctx.input.slice(ctx.position) + let match = remaining.match(DIGIT_PATTERN) + + if match != none and match.start == 0 { + ctx.position += match.text.len() + return (match.text, ctx) + } + + return (none, ctx) +} + +#let parse-value(ctx) = { + ctx = skip-whitespace(ctx) + + if peek-char(ctx) == "\"" { + ctx = advance(ctx) + let start = ctx.position + while ctx.position < ctx.length and peek-char(ctx) != "\"" { + ctx = advance(ctx) + } + let value = ctx.input.slice(start, ctx.position) + if peek-char(ctx) == "\"" { + ctx = advance(ctx) + } + return (value, ctx) + } + + let (ident, newCtx) = parse-identifier(ctx) + if ident != none { + return (ident, newCtx) + } + + let start = ctx.position + let parenDepth = 0 + while ctx.position < ctx.length { + let char = peek-char(ctx) + if parenDepth == 0 and (char == "," or char == ")") { break } + if char == "(" { parenDepth += 1 } + if char == ")" { parenDepth -= 1 } + ctx = advance(ctx) + } + + let value = ctx.input.slice(start, ctx.position).trim() + return (value, ctx) +} + +#let parse-options(ctx) = { + if peek-char(ctx) != "(" { return (none, ctx) } + ctx = advance(ctx) + + let options = (:) + + while ctx.position < ctx.length { + ctx = skip-whitespace(ctx) + + if peek-char(ctx) == ")" { + ctx = advance(ctx) + break + } + + let (key, newCtx) = parse-identifier(ctx) + if key == none { break } + ctx = newCtx + + ctx = skip-whitespace(ctx) + if peek-char(ctx) != ":" { break } + ctx = advance(ctx) + + let (value, newCtx2) = parse-value(ctx) + ctx = newCtx2 + options.insert(key, value) + + ctx = skip-whitespace(ctx) + if peek-char(ctx) == "," { + ctx = advance(ctx) + } + } + + return (options, ctx) +} + +#let parse-label(ctx) = { + if peek-char(ctx) != ":" { return (none, ctx) } + if peek-string(ctx, 2) == "::" { return (none, ctx) } + + ctx = advance(ctx) + return parse-identifier(ctx) +} + +#let parse-bond-label(ctx) = { + if peek-string(ctx, 2) != "::" { return (none, ctx) } + + ctx = advance(ctx, count: 2) + return parse-identifier(ctx) +} + +#let parse-fragment(ctx) = { + ctx = skip-whitespace(ctx) + + let remaining = ctx.input.slice(ctx.position) + let atomMatch = remaining.match(ATOM_STRING_PATTERN) + + if atomMatch == none or atomMatch.start != 0 { + return (none, ctx) + } + + let atom = atomMatch.text + ctx.position += atom.len() + + ctx = skip-whitespace(ctx) + let (label, newCtx) = parse-label(ctx) + if label != none { + ctx = newCtx + } + + ctx = skip-whitespace(ctx) + let options = (:) + + let node = create-node( + nodeType: "fragment", + data: (atom: atom, label: label, options: options) + ) + + return (node, ctx) +} + +// Parser functions that need mutual recursion +#let parse-ring(ctx, parse-mol-fn) = { + ctx = skip-whitespace(ctx) + + if peek-char(ctx) != "*" { return (none, ctx) } + ctx = advance(ctx) + + let (sizeStr, newCtx) = parse-digits(ctx) + if sizeStr == none { return (none, ctx) } + ctx = newCtx + let size = int(sizeStr) + + // Optional content within parentheses + let ringContent = none + ctx = skip-whitespace(ctx) + if peek-char(ctx) == "(" { + ctx = advance(ctx) + + // Create sub-context for ring content + let sub-ctx = create-sub-context(ctx) + + // Parse in sub-context (into independent graph) + let (innerMol, newSubCtx) = parse-mol-fn(sub-ctx, parse-mol-fn: parse-mol-fn) + if innerMol != none { + // Save complete graph structure (not just metadata) + ringContent = newSubCtx.graph + // Update parent context position (up to closing parenthesis) + ctx.position = newSubCtx.position + } + + ctx = skip-whitespace(ctx) + if peek-char(ctx) != ")" { return (none, ctx) } + ctx = advance(ctx) // Consume closing parenthesis + } + + ctx = skip-whitespace(ctx) + let (label, newCtx3) = parse-label(ctx) + if label != none { + ctx = newCtx3 + } + + ctx = skip-whitespace(ctx) + let options = (:) + if peek-char(ctx) == "(" { + let (opts, newCtx4) = parse-options(ctx) + if opts != none { + options = opts + ctx = newCtx4 + } + } + + let node = create-node( + nodeType: "ring", + data: (size: size, content: ringContent, label: label, options: options) + ) + + return (node, ctx) +} + +#let parse-bond(ctx) = { + ctx = skip-whitespace(ctx) + + let bondType = none + let twoChar = peek-string(ctx, 2) + + if twoChar == ":>" { + bondType = "wedge-dashed-right" + ctx = advance(ctx, count: 2) + } else if twoChar == "<:" { + bondType = "wedge-dashed-left" + ctx = advance(ctx, count: 2) + } else { + let char = peek-char(ctx) + if char == "-" { + bondType = "single" + ctx = advance(ctx) + } else if char == "=" { + bondType = "double" + ctx = advance(ctx) + } else if char == "#" { + bondType = "triple" + ctx = advance(ctx) + } else if char == ">" { + bondType = "wedge-filled-right" + ctx = advance(ctx) + } else if char == "<" { + bondType = "wedge-filled-left" + ctx = advance(ctx) + } + } + + if bondType == none { return (none, ctx) } + + ctx = skip-whitespace(ctx) + let (bondLabel, newCtx) = parse-bond-label(ctx) + if bondLabel != none { + ctx = newCtx + } + + ctx = skip-whitespace(ctx) + let options = (:) + if peek-char(ctx) == "(" { + let saved = ctx.position + ctx = advance(ctx) + ctx = skip-whitespace(ctx) + let char = peek-char(ctx) + let twoChar2 = peek-string(ctx, 2) + let isBond = char == "-" or char == "=" or char == "#" or char == ">" or char == "<" or twoChar2 == ":>" or twoChar2 == "<:" + + ctx.position = saved + if not isBond { + let (opts, newCtx2) = parse-options(ctx) + if opts != none { + options = opts + ctx = newCtx2 + } + } + } + + return ((bondType: bondType, label: bondLabel, options: options), ctx) +} + +#let parse-branch(ctx, parentId, parse-mol-fn) = { + if peek-char(ctx) != "(" { return ((), ctx) } + + let saved = ctx.position + ctx = advance(ctx) + ctx = skip-whitespace(ctx) + + let (bond, newCtx) = parse-bond(ctx) + if bond == none { + ctx.position = saved + return ((), ctx) + } + ctx = newCtx + + let savedLastNode = ctx.lastNodeId + ctx.lastNodeId = none + + let (branchMol, newCtx2) = parse-mol-fn(ctx, parse-mol-fn: parse-mol-fn) + ctx = newCtx2 + + ctx.lastNodeId = savedLastNode + + ctx = skip-whitespace(ctx) + if peek-char(ctx) != ")" { + ctx.position = saved + return ((), ctx) + } + ctx = advance(ctx) + + // If branch has no atoms (only bonds), create implicit node + if (branchMol == none or branchMol.root == none) and bond != none and parentId != none { + // Create implicit node + let implicitNode = create-node( + nodeType: "implicit", + data: (atom: none, label: none, options: (:)) + ) + let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) + ctx = ctx3 + + // Create edge from parent node to implicit node + let edge = create-edge( + parentId, + implicitId, + edgeType: "bond", + data: ( + bondType: bond.bondType, + label: bond.label, + options: bond.options, + role: "branch" + ) + ) + ctx = add-edge-to-graph(ctx, edge) + } else if branchMol != none and branchMol.root != none and parentId != none { + // branchMol.root is the node ID + let edge = create-edge( + parentId, + branchMol.root, + edgeType: "bond", + data: ( + bondType: bond.bondType, + label: bond.label, + options: bond.options, + role: "branch" + ) + ) + ctx = add-edge-to-graph(ctx, edge) + } + + return ((bond: bond, molecule: branchMol), ctx) +} + +#let parse-node(ctx, parse-mol-fn) = { + ctx = skip-whitespace(ctx) + + let node = none + let nodeId = none + + let (ringNode, newCtx) = parse-ring(ctx, parse-mol-fn) + if ringNode != none { + let (id, ctx2) = add-node-to-graph(newCtx, ringNode) + nodeId = id + ctx = ctx2 + } else { + let (fragmentNode, newCtx2) = parse-fragment(ctx) + if fragmentNode != none { + let (id, ctx3) = add-node-to-graph(newCtx2, fragmentNode) + nodeId = id + ctx = ctx3 + } + } + + if nodeId == none { return (none, ctx) } + + let branches = () + while true { + let (branch, newCtx) = parse-branch(ctx, nodeId, parse-mol-fn) + if branch == () { break } + branches.push(branch) + ctx = newCtx + } + + return (nodeId, ctx) +} + +#let parse-bond-node-pair(ctx, parse-mol-fn) = { + ctx = skip-whitespace(ctx) + + let (bond, newCtx) = parse-bond(ctx) + if bond == none { return (none, ctx) } + ctx = newCtx + + let (nodeId, newCtx2) = parse-node(ctx, parse-mol-fn) + ctx = newCtx2 + + if nodeId == none { + // Check if there's a branch instead of a node + if peek-char(ctx) == "(" { + // Create an implicit node to attach the branch to + let implicitNode = create-node( + nodeType: "implicit", + data: (atom: none, label: none, options: (:)) + ) + let (id, ctx3) = add-node-to-graph(ctx, implicitNode) + nodeId = id + ctx = ctx3 + + // Parse branches attached to this implicit node + while peek-char(ctx) == "(" { + let (branch, branchCtx) = parse-branch(ctx, nodeId, parse-mol-fn) + if branch == () { break } + ctx = branchCtx + } + } else { + // Create a simple implicit node + let implicitNode = create-node( + nodeType: "implicit", + data: (atom: none, label: none, options: (:)) + ) + let (id, ctx3) = add-node-to-graph(ctx, implicitNode) + nodeId = id + ctx = ctx3 + } + } + + return ((bond: bond, nodeId: nodeId), ctx) +} + +#let process-remote-connections(ctx) = { + for connection in ctx.remoteConnections { + let parts = connection.split("=") + if parts.len() != 2 { continue } + + let fromPart = parts.at(0).trim() + let toPart = parts.at(1).trim() + + if fromPart.starts-with(":") and toPart.starts-with(":") { + let fromLabel = fromPart.slice(1) + let toLabel = toPart.slice(1) + + let options = (:) + let parenIdx = toLabel.position("(") + if parenIdx != none { + toLabel = toLabel.slice(0, parenIdx) + } + + let fromId = ctx.graph.labels.at(fromLabel, default: none) + let toId = ctx.graph.labels.at(toLabel, default: none) + + if fromId != none and toId != none { + let edge = create-edge( + fromId, + toId, + edgeType: "bond", + data: ( + bondType: "double", + role: "remote", + options: options + ) + ) + ctx = add-edge-to-graph(ctx, edge) + } + } + } + return ctx +} + +// Unified parse-molecule function +#let parse-molecule(inputOrCtx, config: (:), parse-mol-fn: none) = { + // Set parse-mol-fn to self if not provided + if parse-mol-fn == none { + parse-mol-fn = parse-molecule + } + + // Determine if this is an initial call (with string input) or recursive call (with context) + let ctx = if type(inputOrCtx) == str { + // Initial call with string input + create-parser-context(inputOrCtx, config: config) + } else { + // Recursive call with context + inputOrCtx + } + + let initialNodeCount = ctx.graph.nodeCounter + let initialEdgeCount = ctx.graph.edgeCounter + let localRoot = none + + let (firstNodeId, newCtx) = parse-node(ctx, parse-mol-fn) + if firstNodeId != none { + ctx = newCtx + ctx.lastNodeId = firstNodeId + localRoot = firstNodeId + if ctx.graph.root == none { + ctx.graph.root = firstNodeId + } + } else { + // Check if input starts with branch ( + if peek-char(ctx) == "(" { + // Create implicit node for branch-starting input + let implicitNode = create-node( + nodeType: "implicit", + data: (atom: none, label: none, options: (:)) + ) + let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) + ctx = ctx3 + ctx.lastNodeId = implicitId + localRoot = implicitId + if ctx.graph.root == none { + ctx.graph.root = implicitId + } + + // Parse branches attached to this implicit node + while peek-char(ctx) == "(" { + let (branch, branchCtx) = parse-branch(ctx, implicitId, parse-mol-fn) + if branch == () { break } + ctx = branchCtx + } + } else { + let savedPos = ctx.position + let (testBond, testCtx) = parse-bond(ctx) + ctx.position = savedPos + + if testBond != none { + let implicitNode = create-node( + nodeType: "implicit", + data: (atom: none, label: none, options: (:)) + ) + let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) + ctx = ctx3 + ctx.lastNodeId = implicitId + localRoot = implicitId + if ctx.graph.root == none { + ctx.graph.root = implicitId + } + } + } + } + + while ctx.position < ctx.length { + ctx = skip-whitespace(ctx) + + if peek-char(ctx) == ")" { break } + + let (pair, newCtx2) = parse-bond-node-pair(ctx, parse-mol-fn) + if pair == none { break } + ctx = newCtx2 + + if ctx.lastNodeId != none and pair.nodeId != none { + let edge = create-edge( + ctx.lastNodeId, + pair.nodeId, + edgeType: "bond", + data: ( + bondType: pair.bond.bondType, + label: pair.bond.label, + options: pair.bond.options, + role: "main" + ) + ) + ctx = add-edge-to-graph(ctx, edge) + } + + if localRoot == none { + localRoot = pair.nodeId + } + + ctx.lastNodeId = pair.nodeId + } + + // Return different results based on whether this is initial or recursive call + if type(inputOrCtx) == str { + // Initial call - process remote connections and return the graph + if ctx.remoteConnections.len() > 0 { + ctx = process-remote-connections(ctx) + } + return ctx.graph + } else { + // Recursive call - return molecule info and context + let molecule = ( + nodes: ctx.graph.nodes.pairs().filter(p => { + let nodeNum = int(p.at(0).slice(5)) + nodeNum >= initialNodeCount + }).len(), + root: localRoot + ) + return (molecule, ctx) + } +} \ No newline at end of file diff --git a/src/molecule/transformer.typ b/src/molecule/transformer.typ new file mode 100644 index 0000000..7667b33 --- /dev/null +++ b/src/molecule/transformer.typ @@ -0,0 +1,408 @@ +#import "../elements/links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left + +#import "iupac-angle.typ": * + +// Inline branch function to avoid circular reference +#let create-branch(body, args: (:)) = { + (( + type: "branch", + body: body, + args: args + ),) +} + +// Inline cycle function to avoid circular reference +#let create-cycle(faces, body, args: (:)) = { + (( + type: "cycle", + faces: faces, + body: body, + args: args + ),) +} + +#let create-fragment(mol, name: none, links: (:), lewis: (), vertical: false, colors: none) = { + let atom-count = if mol == none or mol == "" { + 1 + } else if type(mol) == str { + mol.len() + } else if type(mol) == content { + 1 + } else { + 1 + } + + ( + ( + type: "fragment", + name: name, + atoms: if type(mol) == array { mol } else { (mol,) }, + colors: colors, + links: links, + lewis: lewis, + vertical: vertical, + count: atom-count, + ), + ) +} + +#let process-atom(atom) = { + if atom == none or atom == "" { return () } + + // If atom is already content type, return as is + if type(atom) == content { + return (atom,) + } + + // String processing + if type(atom) == str { + // Pattern with function name followed by parentheses + let func-pattern = regex("^[a-z-]+\(.*\)$") + if atom.match(func-pattern) != none { + return (eval(atom, mode: "markup"),) + } + + // Split each element separately like CH3 -> [$C$, $H_3$] + let elements = () + let i = 0 + let chars = atom.clusters() + + while i < chars.len() { + let char = chars.at(i) + + if char.match(regex("^[A-Z]$")) != none { + let element = char + + if i + 1 < chars.len() and chars.at(i + 1).match(regex("^[a-z]$")) != none { + element += " " + chars.at(i + 1) + i += 1 + } + + let numbers = "" + while i + 1 < chars.len() and chars.at(i + 1).match(regex("^[0-9]$")) != none { + numbers += chars.at(i + 1) + i += 1 + } + if numbers != "" { + element += "_" + numbers + } + + elements.push(eval("$" + element + "$", mode: "markup")) + } else { + elements.push(eval("$" + char + "$", mode: "markup")) + } + + i += 1 + } + + return elements + } + + return (atom,) +} + +// Determine bond connection points based on angle +#let get-bond-connection-points(angle, from-atom-length, to-atom-length) = { + if angle == none { + return (from: none, to: none) + } + + let from-len = if from-atom-length <= 0 { 1 } else { from-atom-length } + let to-len = if to-atom-length <= 0 { 1 } else { to-atom-length } + + let normalized-angle = angle + while normalized-angle > 180deg { normalized-angle -= 360deg } + while normalized-angle < -180deg { normalized-angle += 360deg } + + // -90 < angle <= 90: left to right (from right edge, to left edge) + // Otherwise: right to left (from left edge, to right edge) + let from-point = none + let to-point = none + + if normalized-angle > -90deg and normalized-angle <= 90deg { + from-point = from-len - 1 // Right edge (0-indexed) + to-point = 0 // Left edge + } else { + from-point = 0 // Left edge + to-point = to-len - 1 // Right edge (0-indexed) + } + + return (from: from-point, to: to-point) +} + +#let get-bond-with-angle(bond-type, angle: none, from-atom-length: none, to-atom-length: none) = { + let bond = if bond-type == "double" { + double + } else if bond-type == "triple" { + triple + } else if bond-type == "wedge-filled-right" { + cram-filled-right + } else if bond-type == "wedge-filled-left" { + cram-filled-left + } else if bond-type == "wedge-dashed-right" { + cram-dashed-right + } else if bond-type == "wedge-dashed-left" { + cram-dashed-left + } else { + single + } + + // Calculate connection points (only when from/to are specified) + if from-atom-length != none and to-atom-length != none { + let connection-points = get-bond-connection-points(angle, from-atom-length, to-atom-length) + // TODO: Add processing to use connection-points + } + + // Set angle and connection points + if angle != none { + bond(relative: angle) + } else { + bond() + } +} + +#let build-molecule-structure(graph, node-id, visited, angles) = { + if node-id in visited { return () } + visited.push(node-id) + + let elements = () + let node = graph.nodes.at(node-id) + + if node.type == "fragment" { + let atom-raw = node.data.atom + let atom-content = process-atom(atom-raw) + let count = if type(atom-content) == array { atom-content.len() } else { 1 } + + elements += ( + ( + type: "fragment", + name: none, + atoms: atom-content, + colors: none, + links: (:), + lewis: (), + vertical: false, + count: count, + ), + ) + } else if node.type == "implicit" { + } else if node.type == "ring" { + let ring-size = node.data.size + let ring-content = node.data.at("content", default: none) + + let ring-rotation = calculate-ring-rotation(node-id, graph, angles) + + let cycle-body = () + let leading-fragment = none + + // If ring has content, expand it + if ring-content != none and ring-content != (:) { + // ring-content contains complete graph structure (nodes, edges, root, etc.) + if ring-content.at("root", default: none) != none { + // Follow links to find first and last of linear chain + let chain-nodes = () + let current = ring-content.root + let visited-chain = () + + // Follow the chain + while current != none and current not in visited-chain { + visited-chain.push(current) + let node-data = ring-content.nodes.at(current) + chain-nodes.push((id: current, node: node-data)) + + // Find next node + let next = none + for edge in ring-content.edges { + if edge.from == current and edge.to not in visited-chain { + next = edge.to + break + } + } + current = next + } + + // Check if first and last nodes are fragments + if chain-nodes.len() > 0 { + let first-node = chain-nodes.at(0) + let last-node = chain-nodes.at(-1) + + // If first or last is a fragment, extract it + if first-node.node.type == "fragment" or (chain-nodes.len() > 1 and last-node.node.type == "fragment") { + // Extract one as leading-fragment (prioritize first) + let fragment-node = if first-node.node.type == "fragment" { first-node } else { last-node } + let atom-raw = fragment-node.node.data.atom + let atom-content = process-atom(atom-raw) + let count = if type(atom-content) == array { atom-content.len() } else { 1 } + + leading-fragment = ( + type: "fragment", + name: none, + atoms: atom-content, + colors: none, + links: (:), + lewis: (), + vertical: false, + count: count, + ) + + // Build cycle-body with non-fragment elements + let ring-angles = calculate-all-angles(ring-content) + let ring-visited = (fragment-node.id,) + + // Start from the node after the fragment + let found-next = false + for edge in ring-content.edges { + if edge.from == fragment-node.id { + // Add bond (don't use from/to within cycle) + let bond = get-bond-with-angle( + edge.data.at("bondType", default: "single"), + angle: ring-angles.at(str(edge.from) + "->" + str(edge.to), default: none) + ) + cycle-body += bond + + // Expand from next node + let next-elements = build-molecule-structure(ring-content, edge.to, ring-visited, ring-angles) + cycle-body += next-elements + found-next = true + break + } + } + + // If fragment is at the end, no edges exist, so add default bonds + if not found-next and cycle-body == () { + // Add single bonds for ring size + for i in range(ring-size) { + cycle-body += single() + } + } + } else { + // If no fragment, proceed normally + let ring-angles = calculate-all-angles(ring-content) + let ring-visited = () + let ring-elements = build-molecule-structure(ring-content, ring-content.root, ring-visited, ring-angles) + cycle-body = ring-elements + } + } else { + // If no chain, use default + for i in range(ring-size) { + cycle-body += single() + } + } + } else if ring-content.at("nodes", default: (:)).len() == 0 { + // If no nodes, default to single bonds only + for i in range(ring-size) { + cycle-body += single() + } + } + } else { + // If no content, default to single bonds only + for i in range(ring-size) { + cycle-body += single() + } + } + + // Mark this ring node itself as visited + visited.push(node-id) + + // If leading-fragment exists, add it first + if leading-fragment != none { + elements += (leading-fragment,) + } + + elements += create-cycle(ring-size, cycle-body, args: (relative: ring-rotation)) + } + + let main-edges = () + let branch-edges = () + + for edge in graph.edges { + if edge.from == node-id and edge.to not in visited { + if edge.data.at("role", default: "main") == "main" { + main-edges.push(edge) + } else if edge.data.role == "branch" { + branch-edges.push(edge) + } + } + } + + for edge in branch-edges { + // Get edge angle + let edge-key = str(edge.from) + "->" + str(edge.to) + let angle = angles.at(edge-key, default: none) + + // Get lengths of connecting atoms + let from-atom = if node.type == "fragment" { node.data.atom } else { "" } + let to-node = graph.nodes.at(edge.to) + let to-atom = if to-node.type == "fragment" { to-node.data.atom } else { "" } + + let from-length = if from-atom == "" { 1 } else if type(from-atom) == str { from-atom.len() } else { 1 } + let to-length = if to-atom == "" { 1 } else if type(to-atom) == str { to-atom.len() } else { 1 } + + let bond = get-bond-with-angle( + edge.data.at("bondType", default: "single"), + angle: angle, + from-atom-length: from-length, + to-atom-length: to-length + ) + let branch-elements = build-molecule-structure(graph, edge.to, visited, angles) + + let branch-body = bond + branch-elements + elements += create-branch(branch-body, args: (relative: angle)) + } + + for edge in main-edges { + let edge-key = str(edge.from) + "->" + str(edge.to) + let angle = angles.at(edge-key, default: none) + + // Get lengths of connecting atoms + let from-atom = if node.type == "fragment" { node.data.atom } else { "" } + let to-node = graph.nodes.at(edge.to) + let to-atom = if to-node.type == "fragment" { to-node.data.atom } else { "" } + + let from-length = if from-atom == "" { 1 } else if type(from-atom) == str { from-atom.len() } else { 1 } + let to-length = if to-atom == "" { 1 } else if type(to-atom) == str { to-atom.len() } else { 1 } + + let bond = get-bond-with-angle( + edge.data.at("bondType", default: "single"), + angle: angle, + from-atom-length: from-length, + to-atom-length: to-length + ) + elements += bond + + let next-elements = build-molecule-structure(graph, edge.to, visited, angles) + elements += next-elements + } + + return elements +} + +#let transform(graph) = { + let root = graph.at("root", default: none) + if root == none and graph.nodes.len() > 0 { + for (id, _) in graph.nodes { + if id == "node_0" { + root = id + break + } + } + if root == none { + root = graph.nodes.keys().first() + } + } + + if root == none { + return () + } + + let angles = calculate-all-angles(graph) + + let visited = () + let elements = build-molecule-structure(graph, root, visited, angles) + + return elements +} + +#let transform-molecule(graph) = { + transform(graph) +} \ No newline at end of file From b38de8af91769b25f3fd3aaa1c0ca144430c2722 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Mon, 8 Sep 2025 12:39:15 +0900 Subject: [PATCH 02/30] style: move to elements feature --- src/{ => elements}/molecule/iupac-angle.typ | 0 src/{ => elements}/molecule/molecule.typ | 0 src/{ => elements}/molecule/parser.typ | 0 src/{ => elements}/molecule/transformer.typ | 5 ++--- 4 files changed, 2 insertions(+), 3 deletions(-) rename src/{ => elements}/molecule/iupac-angle.typ (100%) rename src/{ => elements}/molecule/molecule.typ (100%) rename src/{ => elements}/molecule/parser.typ (100%) rename src/{ => elements}/molecule/transformer.typ (98%) diff --git a/src/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ similarity index 100% rename from src/molecule/iupac-angle.typ rename to src/elements/molecule/iupac-angle.typ diff --git a/src/molecule/molecule.typ b/src/elements/molecule/molecule.typ similarity index 100% rename from src/molecule/molecule.typ rename to src/elements/molecule/molecule.typ diff --git a/src/molecule/parser.typ b/src/elements/molecule/parser.typ similarity index 100% rename from src/molecule/parser.typ rename to src/elements/molecule/parser.typ diff --git a/src/molecule/transformer.typ b/src/elements/molecule/transformer.typ similarity index 98% rename from src/molecule/transformer.typ rename to src/elements/molecule/transformer.typ index 7667b33..0102dca 100644 --- a/src/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,6 +1,5 @@ -#import "../elements/links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left - -#import "iupac-angle.typ": * +#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left +#import "iupac-angle.typ": calculate-all-angles, calculate-ring-rotation // Inline branch function to avoid circular reference #let create-branch(body, args: (:)) = { From cb6a426aad61463b2ff3a8c9c57a0859eb47d1df Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Mon, 8 Sep 2025 12:39:46 +0900 Subject: [PATCH 03/30] fix: fix public API name --- lib.typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib.typ b/lib.typ index 88471b2..7cc5731 100644 --- a/lib.typ +++ b/lib.typ @@ -1,7 +1,7 @@ #import "@preview/cetz:0.4.1" #import "src/default.typ": default #import "src/utils/utils.typ" -#import "src/molecule/molecule.typ": molecule +#import "src/elements/molecule/molecule.typ": molecule as mol #import "src/drawer.typ" #import "src/drawer.typ": skeletize, draw-skeleton, skeletize-config, draw-skeleton-config #import "src/elements/links.typ": * From 1a3c3e5abd6d03f0277b6eae36cb18832010f9f7 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Tue, 9 Sep 2025 00:04:28 +0900 Subject: [PATCH 04/30] feat: ring prefix with "@" instead of "*" --- src/elements/molecule/parser.typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 6d006c1..d682f30 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -7,7 +7,7 @@ fragment ::= ATOM_STRING label? options? bond ::= BOND_SYMBOL bond_label? options? branch ::= "(" bond molecule ")" - ring ::= "*" DIGIT+ "(" molecule? ")" label? options? + ring ::= "@" DIGIT+ "(" molecule? ")" label? options? label ::= ":" IDENTIFIER bond_label ::= "::" IDENTIFIER @@ -291,7 +291,7 @@ #let parse-ring(ctx, parse-mol-fn) = { ctx = skip-whitespace(ctx) - if peek-char(ctx) != "*" { return (none, ctx) } + if peek-char(ctx) != "@" { return (none, ctx) } ctx = advance(ctx) let (sizeStr, newCtx) = parse-digits(ctx) From fd866dd610155bb6aa4093b370b9dda9d1797665 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Tue, 9 Sep 2025 00:06:47 +0900 Subject: [PATCH 05/30] feat: improve connection point process and fragment process in ring --- src/elements/molecule/transformer.typ | 284 ++++++++++---------------- 1 file changed, 103 insertions(+), 181 deletions(-) diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 0102dca..86dff56 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -53,7 +53,6 @@ return (atom,) } - // String processing if type(atom) == str { // Pattern with function name followed by parentheses let func-pattern = regex("^[a-z-]+\(.*\)$") @@ -93,43 +92,59 @@ i += 1 } - return elements } - return (atom,) } -// Determine bond connection points based on angle -#let get-bond-connection-points(angle, from-atom-length, to-atom-length) = { +// Get atom priority and find connection point +#let get-atom-connection-point(fragment, from-end: false) = { + let position = 0 + for atom in fragment { + if atom == "C" or atom == "N" or atom == "O" { + return position + } + position += 1 + } + + return 0 +} + +// Determine bond connection points based on atom priority (for rings) or angle (for non-rings) +#let get-bond-connection-points(angle, from-atom, to-atom, in-ring) = { if angle == none { return (from: none, to: none) } - let from-len = if from-atom-length <= 0 { 1 } else { from-atom-length } - let to-len = if to-atom-length <= 0 { 1 } else { to-atom-length } - - let normalized-angle = angle - while normalized-angle > 180deg { normalized-angle -= 360deg } - while normalized-angle < -180deg { normalized-angle += 360deg } - - // -90 < angle <= 90: left to right (from right edge, to left edge) - // Otherwise: right to left (from left edge, to right edge) let from-point = none let to-point = none - if normalized-angle > -90deg and normalized-angle <= 90deg { - from-point = from-len - 1 // Right edge (0-indexed) - to-point = 0 // Left edge + // Outside ring: use angle to determine connection direction + while angle > 180deg { angle -= 360deg } + while angle < -180deg { angle += 360deg } + + if in-ring or angle == 90deg or angle == -90deg { + // Inside ring: use atom priority to determine connection points + let from-pos = get-atom-connection-point(from-atom) + let to-pos = get-atom-connection-point(to-atom) + + // Use the position from the connection point info + from-point = from-pos + to-point = to-pos + } else if angle > -90deg and angle < 90deg { + // Left to right connection + from-point = calc.max(0, from-atom.len() - 1) + to-point = 0 } else { - from-point = 0 // Left edge - to-point = to-len - 1 // Right edge (0-indexed) + // Right to left connection + from-point = 0 + to-point = calc.max(0, to-atom.len() - 1) } return (from: from-point, to: to-point) } -#let get-bond-with-angle(bond-type, angle: none, from-atom-length: none, to-atom-length: none) = { +#let get-bond-with-angle(bond-type, angle: none, from-atom: none, to-atom: none, in-ring: false) = { let bond = if bond-type == "double" { double } else if bond-type == "triple" { @@ -146,21 +161,38 @@ single } - // Calculate connection points (only when from/to are specified) - if from-atom-length != none and to-atom-length != none { - let connection-points = get-bond-connection-points(angle, from-atom-length, to-atom-length) - // TODO: Add processing to use connection-points - } + // Calculate connection points and create bond with appropriate parameters + let bond-args = (:) - // Set angle and connection points + // Add angle if specified (now absolute angle) if angle != none { - bond(relative: angle) + bond-args.insert("absolute", angle) + } + + // Add connection points if atoms are specified + if from-atom != none and to-atom != none { + let connection-points = get-bond-connection-points( + angle, from-atom, to-atom, in-ring + ) + + // Add connection points to bond arguments + if connection-points.from != none { + bond-args.insert("from", connection-points.from) + } + if connection-points.to != none { + bond-args.insert("to", connection-points.to) + } + } + + // Return bond with all calculated parameters + if bond-args.len() > 0 { + bond(..bond-args) } else { bond() } } -#let build-molecule-structure(graph, node-id, visited, angles) = { +#let build-molecule-structure(graph, node-id, visited, angles, in-ring: false) = { if node-id in visited { return () } visited.push(node-id) @@ -192,185 +224,77 @@ let ring-rotation = calculate-ring-rotation(node-id, graph, angles) let cycle-body = () - let leading-fragment = none // If ring has content, expand it - if ring-content != none and ring-content != (:) { - // ring-content contains complete graph structure (nodes, edges, root, etc.) - if ring-content.at("root", default: none) != none { - // Follow links to find first and last of linear chain - let chain-nodes = () - let current = ring-content.root - let visited-chain = () - - // Follow the chain - while current != none and current not in visited-chain { - visited-chain.push(current) - let node-data = ring-content.nodes.at(current) - chain-nodes.push((id: current, node: node-data)) - - // Find next node - let next = none - for edge in ring-content.edges { - if edge.from == current and edge.to not in visited-chain { - next = edge.to - break - } - } - current = next - } - - // Check if first and last nodes are fragments - if chain-nodes.len() > 0 { - let first-node = chain-nodes.at(0) - let last-node = chain-nodes.at(-1) - - // If first or last is a fragment, extract it - if first-node.node.type == "fragment" or (chain-nodes.len() > 1 and last-node.node.type == "fragment") { - // Extract one as leading-fragment (prioritize first) - let fragment-node = if first-node.node.type == "fragment" { first-node } else { last-node } - let atom-raw = fragment-node.node.data.atom - let atom-content = process-atom(atom-raw) - let count = if type(atom-content) == array { atom-content.len() } else { 1 } - - leading-fragment = ( - type: "fragment", - name: none, - atoms: atom-content, - colors: none, - links: (:), - lewis: (), - vertical: false, - count: count, - ) - - // Build cycle-body with non-fragment elements - let ring-angles = calculate-all-angles(ring-content) - let ring-visited = (fragment-node.id,) - - // Start from the node after the fragment - let found-next = false - for edge in ring-content.edges { - if edge.from == fragment-node.id { - // Add bond (don't use from/to within cycle) - let bond = get-bond-with-angle( - edge.data.at("bondType", default: "single"), - angle: ring-angles.at(str(edge.from) + "->" + str(edge.to), default: none) - ) - cycle-body += bond - - // Expand from next node - let next-elements = build-molecule-structure(ring-content, edge.to, ring-visited, ring-angles) - cycle-body += next-elements - found-next = true - break - } - } - - // If fragment is at the end, no edges exist, so add default bonds - if not found-next and cycle-body == () { - // Add single bonds for ring size - for i in range(ring-size) { - cycle-body += single() - } - } - } else { - // If no fragment, proceed normally - let ring-angles = calculate-all-angles(ring-content) - let ring-visited = () - let ring-elements = build-molecule-structure(ring-content, ring-content.root, ring-visited, ring-angles) - cycle-body = ring-elements - } + if ring-content != none and ring-content != (:) and ring-content.at("root", default: none) != none { + let ring-angles = calculate-all-angles(ring-content, is-ring: true) + let ring-visited = () + let ring-elements = build-molecule-structure(ring-content, ring-content.root, ring-visited, ring-angles, in-ring: true) + + // Move leading and trailing fragments + let number = 0 + for i in range(ring-elements.len()) { + let element = ring-elements.at(i) + let type = element.at("type", default: none) + + if type == "fragment" and (number == 0 or number == ring-size) { + elements += (element, ) } else { - // If no chain, use default - for i in range(ring-size) { - cycle-body += single() + if number == ring-size { panic("bonds is too many for ring size: " + str(ring-size)) } + if type == "link" { number += 1 } + if type == "branch" { + let _ = element.at("body").at(0).remove("absolute") } - } - } else if ring-content.at("nodes", default: (:)).len() == 0 { - // If no nodes, default to single bonds only - for i in range(ring-size) { - cycle-body += single() + cycle-body += (element,) } } } else { - // If no content, default to single bonds only for i in range(ring-size) { cycle-body += single() } } - // Mark this ring node itself as visited visited.push(node-id) - // If leading-fragment exists, add it first - if leading-fragment != none { - elements += (leading-fragment,) - } - elements += create-cycle(ring-size, cycle-body, args: (relative: ring-rotation)) } - let main-edges = () - let branch-edges = () - + // Process all edges from current node for edge in graph.edges { - if edge.from == node-id and edge.to not in visited { - if edge.data.at("role", default: "main") == "main" { - main-edges.push(edge) - } else if edge.data.role == "branch" { - branch-edges.push(edge) - } - } - } - - for edge in branch-edges { - // Get edge angle - let edge-key = str(edge.from) + "->" + str(edge.to) - let angle = angles.at(edge-key, default: none) - - // Get lengths of connecting atoms - let from-atom = if node.type == "fragment" { node.data.atom } else { "" } - let to-node = graph.nodes.at(edge.to) - let to-atom = if to-node.type == "fragment" { to-node.data.atom } else { "" } - - let from-length = if from-atom == "" { 1 } else if type(from-atom) == str { from-atom.len() } else { 1 } - let to-length = if to-atom == "" { 1 } else if type(to-atom) == str { to-atom.len() } else { 1 } - - let bond = get-bond-with-angle( - edge.data.at("bondType", default: "single"), - angle: angle, - from-atom-length: from-length, - to-atom-length: to-length - ) - let branch-elements = build-molecule-structure(graph, edge.to, visited, angles) - - let branch-body = bond + branch-elements - elements += create-branch(branch-body, args: (relative: angle)) - } - - for edge in main-edges { + if edge.from != node-id or edge.to in visited { continue } + + let role = edge.data.at("role", default: "main") let edge-key = str(edge.from) + "->" + str(edge.to) let angle = angles.at(edge-key, default: none) // Get lengths of connecting atoms - let from-atom = if node.type == "fragment" { node.data.atom } else { "" } + let from-atom = if node.type == "fragment" { process-atom(node.data.atom) } else { "" } let to-node = graph.nodes.at(edge.to) - let to-atom = if to-node.type == "fragment" { to-node.data.atom } else { "" } - - let from-length = if from-atom == "" { 1 } else if type(from-atom) == str { from-atom.len() } else { 1 } - let to-length = if to-atom == "" { 1 } else if type(to-atom) == str { to-atom.len() } else { 1 } + let to-atom = if to-node.type == "fragment" { process-atom(to-node.data.atom) } else { "" } + + if role == "branch" { + in-ring = false + } let bond = get-bond-with-angle( edge.data.at("bondType", default: "single"), angle: angle, - from-atom-length: from-length, - to-atom-length: to-length + from-atom: from-atom, + to-atom: to-atom, + in-ring: in-ring ) - elements += bond - let next-elements = build-molecule-structure(graph, edge.to, visited, angles) - elements += next-elements + let next-elements = build-molecule-structure(graph, edge.to, visited, angles, in-ring: in-ring) + + // Different handling for main vs branch + if role == "branch" { + let branch-body = bond + next-elements + elements += create-branch(branch-body) + } else { + // Main edge + elements += bond + elements += next-elements + } } return elements @@ -395,9 +319,7 @@ } let angles = calculate-all-angles(graph) - - let visited = () - let elements = build-molecule-structure(graph, root, visited, angles) + let elements = build-molecule-structure(graph, root, (), angles, in-ring: false) return elements } From 69a355041a0d8b274587ffde0450053e431af625 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Tue, 9 Sep 2025 00:07:17 +0900 Subject: [PATCH 06/30] feat: use absolute angle instead of relative angle --- src/elements/molecule/iupac-angle.typ | 71 ++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index 5b52312..aeea935 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -63,9 +63,9 @@ // Calculate branch angles (relative angles) // Equally spaced in appropriate range based on situation -#let calculate-branch-angles(node-id, graph, edge-index, is-root-node, main-chain-length, branch-edges, has-incoming-main, has-outgoing-main) = { +#let calculate-branch-angles(node-id, graph, edge-index, is-root-node, main-chain-length, branch-edges, has-incoming-main, has-outgoing-main, is-in-branch: false) = { let branch-angles = () - let total-branches = branch-edges.len() + let total-branches = branch-edges.len() + if is-in-branch { 1 } else { 0 } if total-branches == 0 { return branch-angles } @@ -107,7 +107,7 @@ } // Calculate angles for all edges from node -#let calculate-edge-angles(node-id, graph, edge-index, is-root-node, main-chain-length, node-info: none) = { +#let calculate-edge-angles(node-id, graph, edge-index, is-root-node, main-chain-length, node-info: none, is-in-branch: false) = { // Get edges and hybridization from node info (calculate defaults if not present) let edges = node-info.at("edges", default: graph.edges.filter(e => e.from == node-id)) @@ -158,7 +158,7 @@ if branch-edges.len() > 0 { let branch-angles-list = calculate-branch-angles( node-id, graph, edge-index, is-root-node, main-chain-length, - branch-edges, has-incoming-main, has-outgoing-main + branch-edges, has-incoming-main, has-outgoing-main, is-in-branch: is-in-branch ) for (index, edge) in branch-edges.enumerate() { let angle = branch-angles-list.at(index) @@ -197,7 +197,7 @@ } // Traverse graph and calculate angles -#let traverse-and-calculate(graph, node-id, visited, edge-index, angles, is-root, node-cache: none) = { +#let traverse-and-calculate(graph, node-id, visited, edge-index, angles, is-root, node-cache: none, is-in-branch: false) = { if node-id in visited { return (angles, edge-index) } visited.push(node-id) @@ -212,7 +212,7 @@ let node-angles = calculate-edge-angles( node-id, graph, edge-index, is-root, main-chain-length, - node-info: node-info + node-info: node-info, is-in-branch: is-in-branch ) // Merge angles (more efficient merging) @@ -229,12 +229,11 @@ if role == "main" { // For main chain, advance the index next-edge-index = next-edge-index + 1 - let (new-angles, new-index) = traverse-and-calculate(graph, edge.to, visited, next-edge-index, angles, false, node-cache: node-cache) + let (new-angles, new-index) = traverse-and-calculate(graph, edge.to, visited, next-edge-index, angles, false, node-cache: node-cache, is-in-branch: is-in-branch) angles = new-angles next-edge-index = new-index } else if role == "branch" { - // For branches, start with new index - let (new-angles, _) = traverse-and-calculate(graph, edge.to, visited, 0, angles, false, node-cache: node-cache) + let (new-angles, _) = traverse-and-calculate(graph, edge.to, visited, 0, angles, false, node-cache: node-cache, is-in-branch: true) angles = new-angles } } @@ -243,10 +242,50 @@ return (angles, next-edge-index) } +// Convert relative angles to absolute angles +#let relative-to-absolute(angles, graph) = { + let absolute-angles = (:) + let node-absolute-angles = (:) // Track cumulative angle for each node + + // Start from root with 0deg absolute angle + let root = graph.at("root", default: none) + if root == none and graph.nodes.len() > 0 { + root = if "node_0" in graph.nodes { "node_0" } else { graph.nodes.keys().first() } + } + + if root != none { + node-absolute-angles.insert(root, 0deg) + } + + // Convert each relative angle to absolute + for (edge-key, relative-angle) in angles { + // Parse edge key to get from and to nodes + let parts = edge-key.split("->") + if parts.len() == 2 { + let from-node = parts.at(0) + let to-node = parts.at(1) + + // Get the absolute angle of the from node (default to 0deg) + let from-absolute = node-absolute-angles.at(from-node, default: 0deg) + + // Calculate absolute angle by adding relative to from node's absolute + let absolute-angle = from-absolute + relative-angle + + // Store for this edge + absolute-angles.insert(edge-key, absolute-angle) + + // Update the to-node's absolute angle for next calculations + node-absolute-angles.insert(to-node, absolute-angle) + } + } + + return absolute-angles +} + // ===== Main Functions ===== -// Calculate angles for entire graph -#let calculate-all-angles(graph) = { +// Calculate relative angles for entire graph +#let calculate-all-relative-angles(graph, is-ring: false) = { // Efficient search for root node let root = graph.at("root", default: none) if root == none and graph.nodes.len() > 0 { @@ -280,12 +319,20 @@ let angles = (:) let (final-angles, _) = traverse-and-calculate( graph, root, visited, 0, angles, true, - node-cache: if node-cache.len() > 0 { node-cache } else { none } + node-cache: if node-cache.len() > 0 { node-cache } else { none }, + is-in-branch: false ) return final-angles } +// Calculate absolute angles for entire graph +#let calculate-all-angles(graph, is-ring: false) = { + let relative-angles = calculate-all-relative-angles(graph, is-ring: is-ring) + + return relative-to-absolute(relative-angles, graph) +} + // Calculate ring rotation angle (using relative angles only) #let calculate-ring-rotation(node-id, graph, angles) = { let outgoing-edges = graph.edges.filter(e => From 722124491916ede40283987326ee6b581b36de7d Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:30:03 +0900 Subject: [PATCH 07/30] feat: replace an existing molecule element --- lib.typ | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib.typ b/lib.typ index 7cc5731..0a36e4e 100644 --- a/lib.typ +++ b/lib.typ @@ -1,7 +1,7 @@ #import "@preview/cetz:0.4.1" #import "src/default.typ": default #import "src/utils/utils.typ" -#import "src/elements/molecule/molecule.typ": molecule as mol +#import "src/elements/molecule/molecule.typ": molecule #import "src/drawer.typ" #import "src/drawer.typ": skeletize, draw-skeleton, skeletize-config, draw-skeleton-config #import "src/elements/links.typ": * @@ -79,7 +79,6 @@ ), ) } -#let molecule(name: none, links: (:), lewis: (), vertical: false, mol) = fragment(name: name, links: links, lewis: lewis, vertical: vertical, mol) /// === Hooks /// Create a hook in the fragment. It allows to connect links to the place where the hook is. From 7e0382f08b0368e39ac8d19c9fa68c753aac68a3 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:42:24 +0900 Subject: [PATCH 08/30] feat: introduce the conceptual design of a syntax for reaction schemes --- src/elements/molecule/parser.typ | 47 ++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index d682f30..68e3a12 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -1,10 +1,24 @@ /* - molecule ::= node? (bond_node_pair)* + // reaction syntax + input ::= reaction + reaction ::= side (ARROW_EXPR side)* + side ::= term ("+" term)* + term ::= COEFFICIENT? molecule + COEFFICIENT ::= DIGIT+ - bond_node_pair::= bond node? - node ::= (fragment | ring) branch* + // arrow expression + ARROW_EXPR ::= COND_BEFORE? ARROW COND_AFTER? + COND_BEFORE ::= "[" TEXT "]" + COND_AFTER ::= "[" TEXT "]" + ARROW ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" - fragment ::= ATOM_STRING label? options? + // molecule syntax + molecule ::= unit (bond unit)* + unit ::= (node | implicit_node) branch* + node ::= fragment | ring + implicit_node ::= ε // empty node + + fragment ::= FRAGMENT label? options? bond ::= BOND_SYMBOL bond_label? options? branch ::= "(" bond molecule ")" ring ::= "@" DIGIT+ "(" molecule? ")" label? options? @@ -13,7 +27,30 @@ bond_label ::= "::" IDENTIFIER options ::= "(" key_value_pair ("," key_value_pair)* ")" key_value_pair::= IDENTIFIER ":" value - value ::= STRING | NUMBER | IDENTIFIER + + // FRAGMENT definition + FRAGMENT ::= MOLECULE | ABBREVIATION + MOLECULE ::= MOLECULE_PART+ CHARGE? + MOLECULE_PART ::= ELEMENT_GROUP | PARENTHETICAL | COMPLEX + ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? + ISOTOPE ::= "^" DIGIT+ + ELEMENT ::= [A-Z][a-z]? + SUBSCRIPT ::= DIGIT+ + PARENTHETICAL ::= "(" MOLECULE ")" SUBSCRIPT? + COMPLEX ::= "[" MOLECULE "]" + CHARGE ::= "^" DIGIT? ("+" | "-") + ABBREVIATION ::= [a-z][A-Za-z]+ + + // bond syntax + BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" + + // remote connection syntax + remote_connection ::= ":" IDENTIFIER "=" ":" IDENTIFIER options? + + // Basic tokens + TEXT ::= [^[\]]+ | [^\s\(\)\[\]:,=\-<>#]+ + IDENTIFIER ::= [a-zA-Z_][a-zA-Z0-9_]* + DIGIT ::= [0-9] */ #let create-parser-context(input, config: (:)) = { From 6df85c7598c6b69beae277ef489ca3b7d49549b6 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:54:48 +0900 Subject: [PATCH 09/30] refactor: update reaction syntax to use operators and enhance fragment definition --- src/elements/molecule/parser.typ | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 68e3a12..828d84a 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -1,22 +1,20 @@ /* // reaction syntax input ::= reaction - reaction ::= side (ARROW_EXPR side)* - side ::= term ("+" term)* + reaction ::= term (OPERATOR term)* term ::= COEFFICIENT? molecule COEFFICIENT ::= DIGIT+ - // arrow expression - ARROW_EXPR ::= COND_BEFORE? ARROW COND_AFTER? - COND_BEFORE ::= "[" TEXT "]" - COND_AFTER ::= "[" TEXT "]" - ARROW ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" + // operator expression + OPERATOR ::= CONDITION? OP_SYMBOL CONDITION? + CONDITION ::= "[" TEXT "]" + OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" | MATH_TEXT // molecule syntax molecule ::= unit (bond unit)* unit ::= (node | implicit_node) branch* node ::= fragment | ring - implicit_node ::= ε // empty node + implicit_node ::= ε fragment ::= FRAGMENT label? options? bond ::= BOND_SYMBOL bond_label? options? @@ -29,7 +27,7 @@ key_value_pair::= IDENTIFIER ":" value // FRAGMENT definition - FRAGMENT ::= MOLECULE | ABBREVIATION + FRAGMENT ::= MOLECULE | ABBREVIATION | MATH_TEXT MOLECULE ::= MOLECULE_PART+ CHARGE? MOLECULE_PART ::= ELEMENT_GROUP | PARENTHETICAL | COMPLEX ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? From 883fca2f5a1576954a175b3add1cf6a72f35b77a Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:03:15 +0900 Subject: [PATCH 10/30] feat: refine remote connection syntax --- src/elements/molecule/parser.typ | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 828d84a..1ffd1e2 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -13,11 +13,13 @@ // molecule syntax molecule ::= unit (bond unit)* unit ::= (node | implicit_node) branch* - node ::= fragment | ring + node ::= fragment | ring | label implicit_node ::= ε fragment ::= FRAGMENT label? options? bond ::= BOND_SYMBOL bond_label? options? + BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" + branch ::= "(" bond molecule ")" ring ::= "@" DIGIT+ "(" molecule? ")" label? options? @@ -39,12 +41,6 @@ CHARGE ::= "^" DIGIT? ("+" | "-") ABBREVIATION ::= [a-z][A-Za-z]+ - // bond syntax - BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" - - // remote connection syntax - remote_connection ::= ":" IDENTIFIER "=" ":" IDENTIFIER options? - // Basic tokens TEXT ::= [^[\]]+ | [^\s\(\)\[\]:,=\-<>#]+ IDENTIFIER ::= [a-zA-Z_][a-zA-Z0-9_]* From 7cae5974ee9158123ad2c404b606e19cd3f37129 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Thu, 11 Sep 2025 14:37:21 +0900 Subject: [PATCH 11/30] refactor: reimplement parser with parser combinator --- src/elements/molecule/parser.typ | 1160 +++++++++++++----------------- src/utils/parser-combinator.typ | 328 +++++++++ 2 files changed, 827 insertions(+), 661 deletions(-) create mode 100644 src/utils/parser-combinator.typ diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 1ffd1e2..cac4163 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -29,15 +29,15 @@ key_value_pair::= IDENTIFIER ":" value // FRAGMENT definition - FRAGMENT ::= MOLECULE | ABBREVIATION | MATH_TEXT - MOLECULE ::= MOLECULE_PART+ CHARGE? - MOLECULE_PART ::= ELEMENT_GROUP | PARENTHETICAL | COMPLEX + FRAGMENT ::= ATOMS | ABBREVIATION | MATH_TEXT + ATOMS ::= ATOMS_PART+ CHARGE? + ATOMS_PART ::= ELEMENT_GROUP | PARENTHETICAL | COMPLEX ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? ISOTOPE ::= "^" DIGIT+ ELEMENT ::= [A-Z][a-z]? SUBSCRIPT ::= DIGIT+ - PARENTHETICAL ::= "(" MOLECULE ")" SUBSCRIPT? - COMPLEX ::= "[" MOLECULE "]" + PARENTHETICAL ::= "(" ATOMS ")" SUBSCRIPT? + COMPLEX ::= "[" ATOMS "]" CHARGE ::= "^" DIGIT? ("+" | "-") ABBREVIATION ::= [a-z][A-Za-z]+ @@ -47,703 +47,541 @@ DIGIT ::= [0-9] */ -#let create-parser-context(input, config: (:)) = { - let mainInput = input - let remoteConnections = () - - if type(input) == content { - let lines = input.text.split("\n").filter(line => line.trim() != "") - if lines.len() > 0 { - mainInput = lines.at(0) - remoteConnections = lines.slice(1) - } - } else if type(input) == str { - mainInput = input - } else { - mainInput = str(input) - } - - ( - input: mainInput, - position: 0, - length: mainInput.len(), - graph: ( - nodes: (:), - edges: (), - nodeCounter: 0, - edgeCounter: 0, - root: none, - labels: (:), - bondLabels: (:), - ), - lastNodeId: none, - config: config, - remoteConnections: remoteConnections, - ) -} - -// Create sub-context (for parsing ring content) -#let create-sub-context(parent-ctx) = { - ( - input: parent-ctx.input, - position: parent-ctx.position, - length: parent-ctx.length, - graph: ( - nodes: (:), - edges: (), - nodeCounter: 0, - edgeCounter: 0, - root: none, - labels: (:), - bondLabels: (:), - ), - lastNodeId: none, - config: parent-ctx.config, - remoteConnections: (), - ) +#import "../../utils/parser-combinator.typ": * + +// ==================== Utilities ==================== + +#let digit = satisfy(c => c >= "0" and c <= "9", name: "digit") +#let letter = satisfy(c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), name: "letter") +#let uppercase = satisfy(c => c >= "A" and c <= "Z", name: "uppercase") +#let lowercase = satisfy(c => c >= "a" and c <= "z", name: "lowercase") +#let alphanum = satisfy(c => { + (c >= "0" and c <= "9") or (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") +}, name: "alphanum") +#let whitespace = one-of(" \t\n\r") +#let ws = many(whitespace) +#let space = one-of(" \t") +#let newline = choice(str("\r\n"), char("\n")) +#let lexeme(p) = map(seq(p, ws), r => r.at(0)) +#let token(s) = lexeme(str(s)) + +// Integer +#let integer = { + let sign = optional(one-of("+-")) + let digits = some(digit) + + map(seq(sign, digits), r => { + let (s, d) = r + let n = int(d.join()) + if s == "-" { -n } else { n } + }) } -#let create-node(nodeType: "fragment", data: (:)) = { - ( - id: none, - type: nodeType, - data: data, - ) -} - -#let create-edge(fromId, toId, edgeType: "bond", data: (:)) = { - ( - id: none, - from: fromId, - to: toId, - type: edgeType, - data: data, - ) -} - -#let add-node-to-graph(ctx, node) = { - let nodeId = "node_" + str(ctx.graph.nodeCounter) - node.id = nodeId - ctx.graph.nodeCounter += 1 - ctx.graph.nodes.insert(nodeId, node) +// Identifier +#let identifier = { + let first = choice(letter, char("_")) + let rest = many(choice(alphanum, char("_"))) - if "label" in node.data and node.data.label != none { - ctx.graph.labels.insert(node.data.label, nodeId) - } - - if ctx.graph.root == none { - ctx.graph.root = nodeId - } - - return (nodeId, ctx) + map(seq(first, rest), r => { + let (f, rs) = r + f + rs.join() + }) } -#let add-edge-to-graph(ctx, edge) = { - let edgeId = "edge_" + str(ctx.graph.edgeCounter) - edge.id = edgeId - ctx.graph.edgeCounter += 1 - ctx.graph.edges.push(edge) - - if "label" in edge.data and edge.data.label != none { - ctx.graph.bondLabels.insert(edge.data.label, edgeId) - } - - return ctx -} - -#let peek-char(ctx) = { - if ctx.position >= ctx.length { return none } - ctx.input.at(ctx.position) -} - -#let peek-string(ctx, length) = { - let end = calc.min(ctx.position + length, ctx.length) - ctx.input.slice(ctx.position, end) -} - -#let advance(ctx, count: 1) = { - ctx.position += count - ctx -} - -#let skip-whitespace(ctx) = { - while ctx.position < ctx.length { - let char = peek-char(ctx) - if char != " " and char != "\t" { break } - ctx = advance(ctx) - } - ctx -} - -#let ATOM_STRING_PATTERN = regex("^([A-Z][a-z]?(\d+)?)+(_[^\s\(\)\[\]:,=\-<>#]+|\^[^\s\(\)\[\]:,=\-<>#]+)*") -#let IDENTIFIER_PATTERN = regex("^[a-zA-Z_][a-zA-Z0-9_]*") -#let DIGIT_PATTERN = regex("^\d+") - -#let parse-identifier(ctx) = { - let remaining = ctx.input.slice(ctx.position) - let match = remaining.match(IDENTIFIER_PATTERN) - - if match != none and match.start == 0 { - ctx.position += match.text.len() - return (match.text, ctx) - } - - return (none, ctx) +// String with escapes +#let string-lit(quote: "\"") = { + let escape = map(seq(char("\\"), any()), r => { + let (_, c) = r + if c == "n" { "\n" } + else if c == "t" { "\t" } + else if c == "r" { "\r" } + else if c == "\\" { "\\" } + else if c == quote { quote } + else { c } + }) + + let normal = none-of(quote + "\\") + let char-parser = choice(escape, normal) + + map(between(char(quote), char(quote), many(char-parser)), chars => chars.join()) } -#let parse-digits(ctx) = { - let remaining = ctx.input.slice(ctx.position) - let match = remaining.match(DIGIT_PATTERN) - - if match != none and match.start == 0 { - ctx.position += match.text.len() - return (match.text, ctx) - } - - return (none, ctx) -} +// ==================== Fragment Components ==================== -#let parse-value(ctx) = { - ctx = skip-whitespace(ctx) - - if peek-char(ctx) == "\"" { - ctx = advance(ctx) - let start = ctx.position - while ctx.position < ctx.length and peek-char(ctx) != "\"" { - ctx = advance(ctx) +// ELEMENT ::= [A-Z][a-z]? +#let element-parser = label( + map( + seq( + uppercase, + optional(lowercase), + ), + parts => { + let (upper, lower) = parts + if lower != none { upper + lower } else { upper } } - let value = ctx.input.slice(start, ctx.position) - if peek-char(ctx) == "\"" { - ctx = advance(ctx) + ), + "element symbol (e.g., H, Ca, Fe)" +) + +// SUBSCRIPT ::= DIGIT+ +#let subscript-parser = label( + map( + some(digit), + digits => int(digits.join()) + ), + "subscript number" +) + +// ISOTOPE ::= "^" DIGIT+ +#let isotope-parser = label( + map( + seq(char("^"), some(digit)), + parts => { + let (_, digits) = parts + (type: "isotope", value: int(digits.join())) } - return (value, ctx) - } - - let (ident, newCtx) = parse-identifier(ctx) - if ident != none { - return (ident, newCtx) + ), + "isotope notation (e.g., ^14, ^235)" +) + +// CHARGE ::= "^" DIGIT? ("+" | "-") +#let charge-parser = label( + map( + seq(char("^"), optional(digit), choice(char("+"), char("-"))), + parts => { + let (_, d, sign) = parts + d + sign + } + ), + "charge notation (e.g., ^+, ^2-, ^3+)" +) + +// ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? +#let element-group-parser = map( + seq(optional(isotope-parser), element-parser, optional(subscript-parser)), + parts => { + let (isotope, element, subscript) = parts + ( + type: "element-group", + isotope: isotope, + element: element, + subscript: subscript + ) } - - let start = ctx.position - let parenDepth = 0 - while ctx.position < ctx.length { - let char = peek-char(ctx) - if parenDepth == 0 and (char == "," or char == ")") { break } - if char == "(" { parenDepth += 1 } - if char == ")" { parenDepth -= 1 } - ctx = advance(ctx) +) + +// ABBREVIATION ::= [a-z][A-Za-z]+ +#let abbreviation-parser = map( + seq(lowercase, some(letter)), + parts => { + let (first, rest) = parts + (type: "abbreviation", value: first + rest.join()) } - - let value = ctx.input.slice(start, ctx.position).trim() - return (value, ctx) -} - -#let parse-options(ctx) = { - if peek-char(ctx) != "(" { return (none, ctx) } - ctx = advance(ctx) - - let options = (:) - - while ctx.position < ctx.length { - ctx = skip-whitespace(ctx) - - if peek-char(ctx) == ")" { - ctx = advance(ctx) - break - } - - let (key, newCtx) = parse-identifier(ctx) - if key == none { break } - ctx = newCtx - - ctx = skip-whitespace(ctx) - if peek-char(ctx) != ":" { break } - ctx = advance(ctx) - - let (value, newCtx2) = parse-value(ctx) - ctx = newCtx2 - options.insert(key, value) - - ctx = skip-whitespace(ctx) - if peek-char(ctx) == "," { - ctx = advance(ctx) +) + +// MATH_TEXT ::= "$" [^$]+ "$" +#let math-text-parser = label( + map( + seq( + char("$"), + some(none-of("$")), + char("$") + ), + parts => { + let (_, chars, _) = parts + (type: "math-text", value: chars.join()) } - } - - return (options, ctx) -} - -#let parse-label(ctx) = { - if peek-char(ctx) != ":" { return (none, ctx) } - if peek-string(ctx, 2) == "::" { return (none, ctx) } - - ctx = advance(ctx) - return parse-identifier(ctx) + ), + "math text notation (e.g., $\\Delta$, $\\mu$)" +) + +#let parenthetical-parser(atoms-parser) = { + label( + map( + seq( + char("("), + lazy(() => atoms-parser()), + char(")"), + optional(subscript-parser) + ), + parts => { + let (_, atoms, _, subscript) = parts + (type: "parenthetical", atoms: atoms, subscript: subscript) + } + ), + "parenthetical group (e.g., (OH)2, (NH4)2)" + ) } -#let parse-bond-label(ctx) = { - if peek-string(ctx, 2) != "::" { return (none, ctx) } - - ctx = advance(ctx, count: 2) - return parse-identifier(ctx) +#let complex-parser(atoms-parser) = { + label( + map( + seq( + char("["), + lazy(() => atoms-parser()), + char("]") + ), + parts => { + let (_, atoms, _) = parts + (type: "complex", atoms: atoms) + } + ), + "complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+)" + ) } -#let parse-fragment(ctx) = { - ctx = skip-whitespace(ctx) - - let remaining = ctx.input.slice(ctx.position) - let atomMatch = remaining.match(ATOM_STRING_PATTERN) - - if atomMatch == none or atomMatch.start != 0 { - return (none, ctx) - } - - let atom = atomMatch.text - ctx.position += atom.len() - - ctx = skip-whitespace(ctx) - let (label, newCtx) = parse-label(ctx) - if label != none { - ctx = newCtx - } - - ctx = skip-whitespace(ctx) - let options = (:) - - let node = create-node( - nodeType: "fragment", - data: (atom: atom, label: label, options: options) +// Forward declarations for recursive parsers +#let atoms-part-parser(atoms-parser) = choice( + element-group-parser, + parenthetical-parser(atoms-parser), + complex-parser(atoms-parser) +) + +#let atoms-parser() = { + label( + map( + seq(some(atoms-part-parser(atoms-parser)), optional(charge-parser)), + parts => { + let (parts, charge) = parts + (type: "atoms", parts: parts, charge: charge) + } + ), + "atoms composition" ) - - return (node, ctx) } -// Parser functions that need mutual recursion -#let parse-ring(ctx, parse-mol-fn) = { - ctx = skip-whitespace(ctx) - - if peek-char(ctx) != "@" { return (none, ctx) } - ctx = advance(ctx) - - let (sizeStr, newCtx) = parse-digits(ctx) - if sizeStr == none { return (none, ctx) } - ctx = newCtx - let size = int(sizeStr) - - // Optional content within parentheses - let ringContent = none - ctx = skip-whitespace(ctx) - if peek-char(ctx) == "(" { - ctx = advance(ctx) - - // Create sub-context for ring content - let sub-ctx = create-sub-context(ctx) - - // Parse in sub-context (into independent graph) - let (innerMol, newSubCtx) = parse-mol-fn(sub-ctx, parse-mol-fn: parse-mol-fn) - if innerMol != none { - // Save complete graph structure (not just metadata) - ringContent = newSubCtx.graph - // Update parent context position (up to closing parenthesis) - ctx.position = newSubCtx.position - } - - ctx = skip-whitespace(ctx) - if peek-char(ctx) != ")" { return (none, ctx) } - ctx = advance(ctx) // Consume closing parenthesis +// FRAGMENT ::= ATOMS | ABBREVIATION | MATH_TEXT +#let fragment-content-parser = choice( + atoms-parser(), + abbreviation-parser, + math-text-parser, + element-parser // Fallback for simple elements +) + +// IDENTIFIER ::= [a-zA-Z_][a-zA-Z0-9_]* +#let identifier-parser = map( + seq( + satisfy(c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") or c == "_", name: "id-first"), + many(alphanum) + ), + parts => { + let (first, rest) = parts + (type: "identifier", value: first + rest.join()) } - - ctx = skip-whitespace(ctx) - let (label, newCtx3) = parse-label(ctx) - if label != none { - ctx = newCtx3 +) + +// label ::= ":" IDENTIFIER +#let label-parser = map( + seq(char(":"), identifier-parser), + parts => { + let (_, id) = parts + (type: "label", name: id.value) } - - ctx = skip-whitespace(ctx) - let options = (:) - if peek-char(ctx) == "(" { - let (opts, newCtx4) = parse-options(ctx) - if opts != none { - options = opts - ctx = newCtx4 +) + +// ==================== Options ==================== + +// Simple value parser +#let value-parser = choice( + map(some(digit), ds => int(ds.join())), + identifier-parser +) + +// key_value_pair ::= IDENTIFIER ":" value +#let key-value-pair-parser = label( + map( + seq(identifier-parser, token(":"), value-parser), + parts => { + let (key, _, value) = parts + (key: key.value, value: value) } - } - - let node = create-node( - nodeType: "ring", - data: (size: size, content: ringContent, label: label, options: options) - ) - - return (node, ctx) -} - -#let parse-bond(ctx) = { - ctx = skip-whitespace(ctx) - - let bondType = none - let twoChar = peek-string(ctx, 2) - - if twoChar == ":>" { - bondType = "wedge-dashed-right" - ctx = advance(ctx, count: 2) - } else if twoChar == "<:" { - bondType = "wedge-dashed-left" - ctx = advance(ctx, count: 2) - } else { - let char = peek-char(ctx) - if char == "-" { - bondType = "single" - ctx = advance(ctx) - } else if char == "=" { - bondType = "double" - ctx = advance(ctx) - } else if char == "#" { - bondType = "triple" - ctx = advance(ctx) - } else if char == ">" { - bondType = "wedge-filled-right" - ctx = advance(ctx) - } else if char == "<" { - bondType = "wedge-filled-left" - ctx = advance(ctx) + ), + "key-value pair (e.g., color: red, angle: 45)" +) + +// options ::= "(" key_value_pair ("," key_value_pair)* ")" +#let options-parser = label( + map( + seq(char("("), sep-by(key-value-pair-parser, token(",")), char(")")), + parts => { + let (_, pairs, _) = parts + (type: "options", pairs: pairs) } - } - - if bondType == none { return (none, ctx) } - - ctx = skip-whitespace(ctx) - let (bondLabel, newCtx) = parse-bond-label(ctx) - if bondLabel != none { - ctx = newCtx - } - - ctx = skip-whitespace(ctx) - let options = (:) - if peek-char(ctx) == "(" { - let saved = ctx.position - ctx = advance(ctx) - ctx = skip-whitespace(ctx) - let char = peek-char(ctx) - let twoChar2 = peek-string(ctx, 2) - let isBond = char == "-" or char == "=" or char == "#" or char == ">" or char == "<" or twoChar2 == ":>" or twoChar2 == "<:" - - ctx.position = saved - if not isBond { - let (opts, newCtx2) = parse-options(ctx) - if opts != none { - options = opts - ctx = newCtx2 - } + ), + "options in parentheses" +) + +// ==================== Fragment ==================== + +#let process-atom(parts) = { + let type = parts.type + + if type == "atoms" { + let base = parts.parts.map(process-atom).join() + if parts.charge != none { + math.attach(base, tr: eval("$" + parts.charge + "$")) + } else { + base } + } else if type == "abbreviation" { + text(parts.value) + } else if type == "math-text" { + eval(parts.value) + } else if type == "element-group" { + math.attach(parts.element, tl: [#parts.isotope], br: [#parts.subscript]) + } else if type == "parenthetical" { + let inner = process-atom(parts.atoms) + math.attach([(#inner)], br: [#parts.subscript]) + } else if type == "complex" { + let inner = process-atom(parts.atoms) + [\[#inner\]] + } else { + "unkown type: " + type } - - return ((bondType: bondType, label: bondLabel, options: options), ctx) } -#let parse-branch(ctx, parentId, parse-mol-fn) = { - if peek-char(ctx) != "(" { return ((), ctx) } - - let saved = ctx.position - ctx = advance(ctx) - ctx = skip-whitespace(ctx) - - let (bond, newCtx) = parse-bond(ctx) - if bond == none { - ctx.position = saved - return ((), ctx) - } - ctx = newCtx - - let savedLastNode = ctx.lastNodeId - ctx.lastNodeId = none - - let (branchMol, newCtx2) = parse-mol-fn(ctx, parse-mol-fn: parse-mol-fn) - ctx = newCtx2 - - ctx.lastNodeId = savedLastNode - - ctx = skip-whitespace(ctx) - if peek-char(ctx) != ")" { - ctx.position = saved - return ((), ctx) - } - ctx = advance(ctx) - - // If branch has no atoms (only bonds), create implicit node - if (branchMol == none or branchMol.root == none) and bond != none and parentId != none { - // Create implicit node - let implicitNode = create-node( - nodeType: "implicit", - data: (atom: none, label: none, options: (:)) - ) - let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) - ctx = ctx3 - - // Create edge from parent node to implicit node - let edge = create-edge( - parentId, - implicitId, - edgeType: "bond", - data: ( - bondType: bond.bondType, - label: bond.label, - options: bond.options, - role: "branch" +// fragment ::= FRAGMENT label? options? +#let fragment-parser = label( + map( + seq(fragment-content-parser, optional(label-parser), optional(options-parser)), + parts => { + let (content, label, options) = parts + ( + type: "fragment", + name: process-atom(content), + label: label, + options: options ) - ) - ctx = add-edge-to-graph(ctx, edge) - } else if branchMol != none and branchMol.root != none and parentId != none { - // branchMol.root is the node ID - let edge = create-edge( - parentId, - branchMol.root, - edgeType: "bond", - data: ( - bondType: bond.bondType, - label: bond.label, - options: bond.options, - role: "branch" - ) - ) - ctx = add-edge-to-graph(ctx, edge) - } - - return ((bond: bond, molecule: branchMol), ctx) -} - -#let parse-node(ctx, parse-mol-fn) = { - ctx = skip-whitespace(ctx) - - let node = none - let nodeId = none - - let (ringNode, newCtx) = parse-ring(ctx, parse-mol-fn) - if ringNode != none { - let (id, ctx2) = add-node-to-graph(newCtx, ringNode) - nodeId = id - ctx = ctx2 - } else { - let (fragmentNode, newCtx2) = parse-fragment(ctx) - if fragmentNode != none { - let (id, ctx3) = add-node-to-graph(newCtx2, fragmentNode) - nodeId = id - ctx = ctx3 } + ), + "molecular fragment" +) + +// ==================== Bonds ==================== + +// BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" +#let bond-symbol-parser = choice( + str("->"), // Arrow prevention + str("=>"), // Arrow prevention + str(":>"), + str("<:"), + str("|>"), + str("<|"), + char("="), + char("#"), + char("-"), + char(">"), + char("<") +) + +// bond_label ::= "::" IDENTIFIER +#let bond-label-parser = map( + seq(str("::"), identifier-parser), + parts => { + let (_, id) = parts + (type: "bond-label", name: id.value) } - - if nodeId == none { return (none, ctx) } - - let branches = () - while true { - let (branch, newCtx) = parse-branch(ctx, nodeId, parse-mol-fn) - if branch == () { break } - branches.push(branch) - ctx = newCtx - } - - return (nodeId, ctx) -} - -#let parse-bond-node-pair(ctx, parse-mol-fn) = { - ctx = skip-whitespace(ctx) - - let (bond, newCtx) = parse-bond(ctx) - if bond == none { return (none, ctx) } - ctx = newCtx - - let (nodeId, newCtx2) = parse-node(ctx, parse-mol-fn) - ctx = newCtx2 - - if nodeId == none { - // Check if there's a branch instead of a node - if peek-char(ctx) == "(" { - // Create an implicit node to attach the branch to - let implicitNode = create-node( - nodeType: "implicit", - data: (atom: none, label: none, options: (:)) +) + +// bond ::= BOND_SYMBOL bond_label? options? +#let bond-parser = label( + map( + seq(bond-symbol-parser, optional(bond-label-parser), optional(options-parser)), + parts => { + let (symbol, label, options) = parts + ( + type: "bond", + symbol: symbol, + label: label, + options: options ) - let (id, ctx3) = add-node-to-graph(ctx, implicitNode) - nodeId = id - ctx = ctx3 - - // Parse branches attached to this implicit node - while peek-char(ctx) == "(" { - let (branch, branchCtx) = parse-branch(ctx, nodeId, parse-mol-fn) - if branch == () { break } - ctx = branchCtx - } - } else { - // Create a simple implicit node - let implicitNode = create-node( - nodeType: "implicit", - data: (atom: none, label: none, options: (:)) + } + ), + "chemical bond" +) + +// ==================== Rings ==================== + +// ring ::= "@" DIGIT+ "(" molecule? ")" label? options? +#let ring-parser(mol-parser) = label( + lazy(() => map( + seq( + char("@"), + some(digit), + optional( + seq( + char("("), + mol-parser, + char(")"), + ), + ), + optional(label-parser), + optional(options-parser) + ), + parts => { + let (_, digits, mol, lbl, opts) = parts + ( + type: "cycle", + faces: int(digits.join()), + body: mol, + label: lbl, + options: opts ) - let (id, ctx3) = add-node-to-graph(ctx, implicitNode) - nodeId = id - ctx = ctx3 } + )), + "ring notation (e.g., @6, @5(C-C-C-C-C))" +) + +// ==================== Molecules ==================== + +// node ::= fragment | ring | label +#let node-parser(mol-parser) = choice( + fragment-parser, + ring-parser(mol-parser), + label-parser +) + +// branch ::= "(" bond molecule ")" +#let branch-parser(mol-parser) = map( + seq( + char("("), + bond-parser, + mol-parser, + char(")") + ), + parts => { + let (_, bond, molecule, _) = parts + (type: "branch", bond: bond, body: molecule) } - - return ((bond: bond, nodeId: nodeId), ctx) -} - -#let process-remote-connections(ctx) = { - for connection in ctx.remoteConnections { - let parts = connection.split("=") - if parts.len() != 2 { continue } - - let fromPart = parts.at(0).trim() - let toPart = parts.at(1).trim() - - if fromPart.starts-with(":") and toPart.starts-with(":") { - let fromLabel = fromPart.slice(1) - let toLabel = toPart.slice(1) - - let options = (:) - let parenIdx = toLabel.position("(") - if parenIdx != none { - toLabel = toLabel.slice(0, parenIdx) - } - - let fromId = ctx.graph.labels.at(fromLabel, default: none) - let toId = ctx.graph.labels.at(toLabel, default: none) - - if fromId != none and toId != none { - let edge = create-edge( - fromId, - toId, - edgeType: "bond", - data: ( - bondType: "double", - role: "remote", - options: options - ) +) + +// unit ::= (node | implicit_node) branch* +#let unit-parser(mol-parser) = map( + seq(optional(node-parser(mol-parser)), many(branch-parser(mol-parser))), + parts => { + let (node, branches) = parts + ( + type: "unit", + node: if node == none { (type: "implicit") } else { node }, + branches: branches + ) + } +) + +// molecule ::= unit (bond unit)* +#let molecule-parser() = { + // Create a lazy reference to itself + let self = lazy(() => molecule-parser()) + + label( + map( + seq( + unit-parser(self), + many(seq(bond-parser, unit-parser(self))) + ), + nodes => { + let (first, rest) = nodes + ( + type: "molecule", + first: first, + rest: rest ) - ctx = add-edge-to-graph(ctx, edge) } - } - } - return ctx + ), + "molecule structure" + ) } -// Unified parse-molecule function -#let parse-molecule(inputOrCtx, config: (:), parse-mol-fn: none) = { - // Set parse-mol-fn to self if not provided - if parse-mol-fn == none { - parse-mol-fn = parse-molecule - } - - // Determine if this is an initial call (with string input) or recursive call (with context) - let ctx = if type(inputOrCtx) == str { - // Initial call with string input - create-parser-context(inputOrCtx, config: config) - } else { - // Recursive call with context - inputOrCtx - } - - let initialNodeCount = ctx.graph.nodeCounter - let initialEdgeCount = ctx.graph.edgeCounter - let localRoot = none - - let (firstNodeId, newCtx) = parse-node(ctx, parse-mol-fn) - if firstNodeId != none { - ctx = newCtx - ctx.lastNodeId = firstNodeId - localRoot = firstNodeId - if ctx.graph.root == none { - ctx.graph.root = firstNodeId - } - } else { - // Check if input starts with branch ( - if peek-char(ctx) == "(" { - // Create implicit node for branch-starting input - let implicitNode = create-node( - nodeType: "implicit", - data: (atom: none, label: none, options: (:)) - ) - let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) - ctx = ctx3 - ctx.lastNodeId = implicitId - localRoot = implicitId - if ctx.graph.root == none { - ctx.graph.root = implicitId - } - - // Parse branches attached to this implicit node - while peek-char(ctx) == "(" { - let (branch, branchCtx) = parse-branch(ctx, implicitId, parse-mol-fn) - if branch == () { break } - ctx = branchCtx - } - } else { - let savedPos = ctx.position - let (testBond, testCtx) = parse-bond(ctx) - ctx.position = savedPos - - if testBond != none { - let implicitNode = create-node( - nodeType: "implicit", - data: (atom: none, label: none, options: (:)) - ) - let (implicitId, ctx3) = add-node-to-graph(ctx, implicitNode) - ctx = ctx3 - ctx.lastNodeId = implicitId - localRoot = implicitId - if ctx.graph.root == none { - ctx.graph.root = implicitId - } - } +// ==================== Reactions ==================== + +// COEFFICIENT ::= DIGIT+ +#let coefficient-parser = label( + map( + some(digit), + digits => (type: "coefficient", value: int(digits.join())) + ), + "stoichiometric coefficient" +) + +// OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" +#let op-symbol-parser = choice( + str("<=>"), + str("-->"), + str("->"), + str("=>"), + str("⇌"), + str("→"), + str("⇄"), + char("+") +) + +// CONDITION ::= "[" TEXT "]" +#let condition-parser = label( + map( + seq(char("["), many(none-of("]")), char("]")), + parts => { + let (_, chars, _) = parts + (type: "condition", text: chars.join()) } + ), + "reaction condition (e.g., [heat], [catalyst])" +) + +// OPERATOR ::= CONDITION? OP_SYMBOL CONDITION? +#let operator-parser = map( + seq(ws, optional(condition-parser), op-symbol-parser, optional(condition-parser), ws), + parts => { + let (_, cond1, symbol, cond2, _) = parts + ( + type: "operator", + condition-before: cond1, + symbol: symbol, + condition-after: cond2 + ) } - - while ctx.position < ctx.length { - ctx = skip-whitespace(ctx) - - if peek-char(ctx) == ")" { break } - - let (pair, newCtx2) = parse-bond-node-pair(ctx, parse-mol-fn) - if pair == none { break } - ctx = newCtx2 - - if ctx.lastNodeId != none and pair.nodeId != none { - let edge = create-edge( - ctx.lastNodeId, - pair.nodeId, - edgeType: "bond", - data: ( - bondType: pair.bond.bondType, - label: pair.bond.label, - options: pair.bond.options, - role: "main" - ) +) + +// term ::= COEFFICIENT? molecule +#let term-parser = label( + map( + seq(optional(coefficient-parser), molecule-parser()), + parts => { + let (coeff, mol) = parts + ( + type: "term", + coefficient: coeff, + molecule: mol ) - ctx = add-edge-to-graph(ctx, edge) } - - if localRoot == none { - localRoot = pair.nodeId - } - - ctx.lastNodeId = pair.nodeId - } - - // Return different results based on whether this is initial or recursive call - if type(inputOrCtx) == str { - // Initial call - process remote connections and return the graph - if ctx.remoteConnections.len() > 0 { - ctx = process-remote-connections(ctx) + ), + "reaction term" +) + +// reaction ::= term (OPERATOR term)* +#let reaction-parser = label( + map( + seq(term-parser, many(seq(operator-parser, term-parser))), + parts => { + let (first, rest) = parts + let terms = (first,) + let edges = () + for (operator, term) in rest { + terms.push(term) + edges.push((..operator, from: terms.len() - 1, to: terms.len())) + } + ( + type: "reaction", + terms: terms, + edges: edges + ) } - return ctx.graph - } else { - // Recursive call - return molecule info and context - let molecule = ( - nodes: ctx.graph.nodes.pairs().filter(p => { - let nodeNum = int(p.at(0).slice(5)) - nodeNum >= initialNodeCount - }).len(), - root: localRoot - ) - return (molecule, ctx) - } -} \ No newline at end of file + ), + "chemical reaction" +) + +// ==================== Parse Functions ==================== + +#let alchemist-parser(input) = { + let full = map(seq(reaction-parser, eof()), r => r.at(0)) + parse(full, input) +} diff --git a/src/utils/parser-combinator.typ b/src/utils/parser-combinator.typ new file mode 100644 index 0000000..8ace06e --- /dev/null +++ b/src/utils/parser-combinator.typ @@ -0,0 +1,328 @@ +// Parse state +#let state(input, pos: 0) = ( + input: input, + pos: pos, + len: input.len(), + at: self => if self.pos < self.len { self.input.at(self.pos) } else { none }, + peek: (self, n: 1) => { + if self.pos + n <= self.len { + self.input.slice(self.pos, self.pos + n) + } else { + none + } + }, + advance: (self, n: 1) => state(self.input, pos: self.pos + n), + remaining: self => self.input.slice(self.pos), + is-eof: self => self.pos >= self.len, +) + +// Result types +#let ok(value, state) = (ok: true, value: value, state: state) +#let err(msg, state) = (ok: false, error: msg, state: state) + +// Parser type +#let parser(name, fn) = (name: name, run: fn) + +// ==================== Basic Parsers ==================== + +// Match any character +#let any() = parser("any", s => { + let c = (s.at)(s) + if c != none { + ok(c, (s.advance)(s)) + } else { + err("end of input", s) + } +}) + +// Match specific character +#let char(c) = parser("char(" + c + ")", s => { + let ch = (s.at)(s) + if ch == c { + ok(c, (s.advance)(s)) + } else { + err("expected " + c, s) + } +}) + +// Match string +#let str(text) = parser("str(" + text + ")", s => { + let peek = (s.peek)(s, n: text.len()) + if peek == text { + ok(text, (s.advance)(s, n: text.len())) + } else { + err("expected " + text, s) + } +}) + +// Match one of characters +#let one-of(chars) = parser("one-of", s => { + let c = (s.at)(s) + if c != none and chars.contains(c) { + ok(c, (s.advance)(s)) + } else { + err("expected one of " + chars, s) + } +}) + +// Match none of characters +#let none-of(chars) = parser("none-of", s => { + let c = (s.at)(s) + if c != none and not chars.contains(c) { + ok(c, (s.advance)(s)) + } else { + err("unexpected " + repr(c), s) + } +}) + +// Match with predicate +#let satisfy(pred, name: "satisfy") = parser(name, s => { + let c = (s.at)(s) + if c != none and pred(c) { + ok(c, (s.advance)(s)) + } else { + err(name + " failed", s) + } +}) + +// Match end of input +#let eof() = parser("eof", s => { + if (s.is-eof)(s) { + ok(none, s) + } else { + err("expected end of input", s) + } +}) + +// ==================== Combinators ==================== + +// Map result +#let map(p, f) = parser("map", s => { + let r = (p.run)(s) + if r.ok { + ok(f(r.value), r.state) + } else { + r + } +}) + +// Sequence parsers (variadic) +#let seq(..parsers) = { + let ps = parsers.pos() + if ps.len() == 0 { return parser("empty", s => ok((), s)) } + if ps.len() == 1 { return ps.at(0) } + + parser("seq", s => { + let results = () + let current = s + + for p in ps { + let r = (p.run)(current) + if not r.ok { return r } + results.push(r.value) + current = r.state + } + + ok(results, current) + }) +} + +// Choice (variadic) +#let choice(..parsers) = { + let ps = parsers.pos() + if ps.len() == 0 { panic("choice requires at least one parser") } + if ps.len() == 1 { return ps.at(0) } + + parser("choice", s => { + for p in ps { + let r = (p.run)(s) + if r.ok { return r } + } + err("no alternative matched", s) + }) +} + +// Optional +#let optional(p) = parser("optional", s => { + let r = (p.run)(s) + if r.ok { + ok(r.value, r.state) + } else { + ok(none, s) + } +}) + +// Optional with default value +#let optional-default(p, default) = map( + optional(p), + v => if v != none { v } else { default } +) + +// Zero or more +#let many(p) = parser("many", s => { + let results = () + let current = s + + while true { + let r = (p.run)(current) + if not r.ok { break } + results.push(r.value) + current = r.state + } + + ok(results, current) +}) + +// One or more +#let some(p) = parser("some", s => { + let first = (p.run)(s) + if not first.ok { return first } + + let rest = (many(p).run)(first.state) + ok((first.value,) + rest.value, rest.state) +}) + +// Between delimiters +#let between(left, right, p) = parser("between", s => { + let l = (left.run)(s) + if not l.ok { return l } + + let m = (p.run)(l.state) + if not m.ok { return m } + + let r = (right.run)(m.state) + if not r.ok { return r } + + ok(m.value, r.state) +}) + +// Separated by +#let sep-by(p, separator) = parser("sep-by", s => { + let first = (p.run)(s) + if not first.ok { return ok((), s) } + + let results = (first.value,) + let current = first.state + + while true { + let sep = (separator.run)(current) + if not sep.ok { break } + + let item = (p.run)(sep.state) + if not item.ok { break } + + results.push(item.value) + current = item.state + } + + ok(results, current) +}) + +// Separated by (at least one) +#let sep-by1(p, separator) = parser("sep-by1", s => { + let first = (p.run)(s) + if not first.ok { return first } + + let rest = (sep-by(p, separator).run)(first.state) + if rest.value.len() == 0 { + ok((first.value,), first.state) + } else { + ok((first.value,) + rest.value, rest.state) + } +}) + +// Count exact +#let count(n, p) = parser("count", s => { + let results = () + let current = s + + for i in range(n) { + let r = (p.run)(current) + if not r.ok { return err("expected " + str(n) + " items, got " + str(i), current) } + results.push(r.value) + current = r.state + } + + ok(results, current) +}) + +// Lookahead - check without consuming +#let lookahead(p) = parser("lookahead", s => { + let r = (p.run)(s) + if r.ok { + ok(r.value, s) // Don't advance + } else { + r + } +}) + +// Negative lookahead +#let not-ahead(p) = parser("not", s => { + let r = (p.run)(s) + if r.ok { + err("unexpected " + repr(r.value), s) + } else { + ok(none, s) + } +}) + +// Attempt - backtrack on failure +#let attempt(p) = parser("attempt", s => { + (p.run)(s) +}) + +// Label for better errors +#let label(p, lbl) = parser(lbl, s => { + let r = (p.run)(s) + if not r.ok { + err(lbl + " failed: " + r.error, s) + } else { + r + } +}) + +// Chain left - for left-associative operators +#let chainl(p, op, default: none) = parser("chainl", s => { + let first = (p.run)(s) + if not first.ok { + if default != none { + return ok(default, s) + } + return first + } + + let acc = first.value + let current = first.state + + while true { + let o = (op.run)(current) + if not o.ok { break } + + let next = (p.run)(o.state) + if not next.ok { break } + + acc = (o.value)(acc, next.value) + current = next.state + } + + ok(acc, current) +}) + +// Lazy parser - defers evaluation until needed +#let lazy(thunk) = parser("lazy", s => { + let p = thunk() + (p.run)(s) +}) + + +// Run parser +#let parse(p, input) = { + let s = state(input) + let r = (p.run)(s) + ( + success: r.ok, + value: if r.ok { r.value } else { none }, + error: if not r.ok { r.error } else { none }, + rest: (r.state.remaining)(r.state), + ) +} From 06db4a3717dae6c864d035b46bc730db98a47e23 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 01:42:03 +0900 Subject: [PATCH 12/30] feat: update tranformer to follow parser --- src/elements/molecule/molecule.typ | 33 ++- src/elements/molecule/parser.typ | 16 +- src/elements/molecule/transformer.typ | 360 +++++--------------------- 3 files changed, 105 insertions(+), 304 deletions(-) diff --git a/src/elements/molecule/molecule.typ b/src/elements/molecule/molecule.typ index 7fc463d..bda4837 100644 --- a/src/elements/molecule/molecule.typ +++ b/src/elements/molecule/molecule.typ @@ -1,10 +1,31 @@ -#import "parser.typ": parse-molecule +#import "parser.typ": alchemist-parser +#import "iupac-angle.typ": calculate_angles #import "transformer.typ": transform #let molecule(content, name: none, ..args) = { - let graph = parse-molecule(content) - - let elements = transform(graph) - - elements + let parsed = alchemist-parser(content) + if not parsed.success { + panic([ + Failed to parse #content reaction: #parsed.error + #repr(parsed) + ]) + } + + let reaction = parsed.value + reaction.terms.map(term => { + if term.type == "term" { + let molecule = term.molecule + let molecule_with_angles = calculate_angles(molecule) + transform(molecule_with_angles) + } else if term.type == "operator" { + let op = term.symbol + (( + type: "operator", + symbol: eval("$" + op + "$"), + margin: 0em, + ),) + } else { + panic("Unknown term type: " + term.type) + } + }).join() } diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index cac4163..dbc0b7a 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -420,6 +420,11 @@ ), parts => { let (_, digits, mol, lbl, opts) = parts + if type(mol) == array { + let (_, mol, _) = mol + } else { + mol = none + } ( type: "cycle", faces: int(digits.join()), @@ -484,7 +489,10 @@ ( type: "molecule", first: first, - rest: rest + rest: rest.map(unit => { + let (bond, unit) = unit + (bond: bond, unit: unit) + }) ) } ), @@ -564,15 +572,13 @@ parts => { let (first, rest) = parts let terms = (first,) - let edges = () for (operator, term) in rest { + terms.push(operator) terms.push(term) - edges.push((..operator, from: terms.len() - 1, to: terms.len())) } ( type: "reaction", - terms: terms, - edges: edges + terms: terms ) } ), diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 86dff56..87231f1 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,329 +1,103 @@ +// Simple transformer for new parser structure with angles already calculated #import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left -#import "iupac-angle.typ": calculate-all-angles, calculate-ring-rotation -// Inline branch function to avoid circular reference -#let create-branch(body, args: (:)) = { - (( - type: "branch", - body: body, - args: args - ),) -} - -// Inline cycle function to avoid circular reference -#let create-cycle(faces, body, args: (:)) = { - (( - type: "cycle", - faces: faces, - body: body, - args: args - ),) -} - -#let create-fragment(mol, name: none, links: (:), lewis: (), vertical: false, colors: none) = { - let atom-count = if mol == none or mol == "" { - 1 - } else if type(mol) == str { - mol.len() - } else if type(mol) == content { - 1 - } else { - 1 - } - +// Create fragment element +#let transform_fragment(node) = { + let atoms = node.name ( - ( - type: "fragment", - name: name, - atoms: if type(mol) == array { mol } else { (mol,) }, - colors: colors, - links: links, - lewis: lewis, - vertical: vertical, - count: atom-count, - ), + type: "fragment", + atoms: if type(atoms) == array { atoms } else { (atoms,) }, + name: none, + links: (:), + lewis: (), + vertical: false, + count: if type(atoms) == array { atoms.len() } else { 1 }, + colors: none, ) } -#let process-atom(atom) = { - if atom == none or atom == "" { return () } - - // If atom is already content type, return as is - if type(atom) == content { - return (atom,) - } - - if type(atom) == str { - // Pattern with function name followed by parentheses - let func-pattern = regex("^[a-z-]+\(.*\)$") - if atom.match(func-pattern) != none { - return (eval(atom, mode: "markup"),) - } - - // Split each element separately like CH3 -> [$C$, $H_3$] - let elements = () - let i = 0 - let chars = atom.clusters() - - while i < chars.len() { - let char = chars.at(i) - - if char.match(regex("^[A-Z]$")) != none { - let element = char - - if i + 1 < chars.len() and chars.at(i + 1).match(regex("^[a-z]$")) != none { - element += " " + chars.at(i + 1) - i += 1 - } - - let numbers = "" - while i + 1 < chars.len() and chars.at(i + 1).match(regex("^[0-9]$")) != none { - numbers += chars.at(i + 1) - i += 1 - } - if numbers != "" { - element += "_" + numbers - } - - elements.push(eval("$" + element + "$", mode: "markup")) - } else { - elements.push(eval("$" + char + "$", mode: "markup")) - } - - i += 1 - } - return elements - } - return (atom,) -} - -// Get atom priority and find connection point -#let get-atom-connection-point(fragment, from-end: false) = { - let position = 0 - for atom in fragment { - if atom == "C" or atom == "N" or atom == "O" { - return position - } - position += 1 - } - - return 0 -} - -// Determine bond connection points based on atom priority (for rings) or angle (for non-rings) -#let get-bond-connection-points(angle, from-atom, to-atom, in-ring) = { - if angle == none { - return (from: none, to: none) - } - - let from-point = none - let to-point = none - - // Outside ring: use angle to determine connection direction - while angle > 180deg { angle -= 360deg } - while angle < -180deg { angle += 360deg } - - if in-ring or angle == 90deg or angle == -90deg { - // Inside ring: use atom priority to determine connection points - let from-pos = get-atom-connection-point(from-atom) - let to-pos = get-atom-connection-point(to-atom) - - // Use the position from the connection point info - from-point = from-pos - to-point = to-pos - } else if angle > -90deg and angle < 90deg { - // Left to right connection - from-point = calc.max(0, from-atom.len() - 1) - to-point = 0 - } else { - // Right to left connection - from-point = 0 - to-point = calc.max(0, to-atom.len() - 1) - } - - return (from: from-point, to: to-point) -} +// Create bond element based on symbol and angle +#let transform_bond(bond) = { + let symbol = bond.symbol + let absolute = bond.at("absolute", default: none) + let relative = bond.at("relative", default: none) -#let get-bond-with-angle(bond-type, angle: none, from-atom: none, to-atom: none, in-ring: false) = { - let bond = if bond-type == "double" { + let bond-fn = if symbol == "-" { + single + } else if symbol == "=" { double - } else if bond-type == "triple" { + } else if symbol == "#" { triple - } else if bond-type == "wedge-filled-right" { + } else if symbol == ">" { cram-filled-right - } else if bond-type == "wedge-filled-left" { + } else if symbol == "<" { cram-filled-left - } else if bond-type == "wedge-dashed-right" { + } else if symbol == ":>" { cram-dashed-right - } else if bond-type == "wedge-dashed-left" { + } else if symbol == "<:" { cram-dashed-left } else { single } - // Calculate connection points and create bond with appropriate parameters - let bond-args = (:) - - // Add angle if specified (now absolute angle) - if angle != none { - bond-args.insert("absolute", angle) - } - - // Add connection points if atoms are specified - if from-atom != none and to-atom != none { - let connection-points = get-bond-connection-points( - angle, from-atom, to-atom, in-ring - ) - - // Add connection points to bond arguments - if connection-points.from != none { - bond-args.insert("from", connection-points.from) - } - if connection-points.to != none { - bond-args.insert("to", connection-points.to) - } - } - - // Return bond with all calculated parameters - if bond-args.len() > 0 { - bond(..bond-args) - } else { - bond() - } + bond-fn(absolute: absolute, relative: relative) } -#let build-molecule-structure(graph, node-id, visited, angles, in-ring: false) = { - if node-id in visited { return () } - visited.push(node-id) - +#let transform_branch(branch, transform) = { let elements = () - let node = graph.nodes.at(node-id) - - if node.type == "fragment" { - let atom-raw = node.data.atom - let atom-content = process-atom(atom-raw) - let count = if type(atom-content) == array { atom-content.len() } else { 1 } - - elements += ( - ( - type: "fragment", - name: none, - atoms: atom-content, - colors: none, - links: (:), - lewis: (), - vertical: false, - count: count, - ), - ) - } else if node.type == "implicit" { - } else if node.type == "ring" { - let ring-size = node.data.size - let ring-content = node.data.at("content", default: none) - - let ring-rotation = calculate-ring-rotation(node-id, graph, angles) - - let cycle-body = () - - // If ring has content, expand it - if ring-content != none and ring-content != (:) and ring-content.at("root", default: none) != none { - let ring-angles = calculate-all-angles(ring-content, is-ring: true) - let ring-visited = () - let ring-elements = build-molecule-structure(ring-content, ring-content.root, ring-visited, ring-angles, in-ring: true) - - // Move leading and trailing fragments - let number = 0 - for i in range(ring-elements.len()) { - let element = ring-elements.at(i) - let type = element.at("type", default: none) - - if type == "fragment" and (number == 0 or number == ring-size) { - elements += (element, ) - } else { - if number == ring-size { panic("bonds is too many for ring size: " + str(ring-size)) } - if type == "link" { number += 1 } - if type == "branch" { - let _ = element.at("body").at(0).remove("absolute") - } - cycle-body += (element,) - } - } - } else { - for i in range(ring-size) { - cycle-body += single() - } - } - - visited.push(node-id) - - elements += create-cycle(ring-size, cycle-body, args: (relative: ring-rotation)) - } + elements += transform_bond(branch.bond) + elements += transform(branch.body) - // Process all edges from current node - for edge in graph.edges { - if edge.from != node-id or edge.to in visited { continue } - - let role = edge.data.at("role", default: "main") - let edge-key = str(edge.from) + "->" + str(edge.to) - let angle = angles.at(edge-key, default: none) - - // Get lengths of connecting atoms - let from-atom = if node.type == "fragment" { process-atom(node.data.atom) } else { "" } - let to-node = graph.nodes.at(edge.to) - let to-atom = if to-node.type == "fragment" { process-atom(to-node.data.atom) } else { "" } + return ( + type: "branch", + body: elements, + args: (:), + ) +} - if role == "branch" { - in-ring = false - } - - let bond = get-bond-with-angle( - edge.data.at("bondType", default: "single"), - angle: angle, - from-atom: from-atom, - to-atom: to-atom, - in-ring: in-ring - ) - - let next-elements = build-molecule-structure(graph, edge.to, visited, angles, in-ring: in-ring) +#let transform_cycle(cycle, transform) = { + return ( + type: "cycle", + faces: cycle.faces, + body: if cycle.body != none { transform(cycle.body) } else { none }, + args: (:), + ) +} - // Different handling for main vs branch - if role == "branch" { - let branch-body = bond + next-elements - elements += create-branch(branch-body) +// Transform a single unit (node + branches) +#let transform_unit(unit, transform) = { + let elements = () + + // Add node content + if unit.node != none { + if unit.node.type == "fragment" { + elements.push(transform_fragment(unit.node)) + } else if unit.node.type == "cycle" { + elements.push(transform_cycle(unit.node, transform)) + } else if unit.node.type == "implicit" { } else { - // Main edge - elements += bond - elements += next-elements + panic("Unknown node type: " + unit.node.type) } } + // Add branches + elements += unit.branches.map(branch => transform_branch(branch, transform)) + return elements } -#let transform(graph) = { - let root = graph.at("root", default: none) - if root == none and graph.nodes.len() > 0 { - for (id, _) in graph.nodes { - if id == "node_0" { - root = id - break - } - } - if root == none { - root = graph.nodes.keys().first() - } - } - - if root == none { +// Main transformation function +#let transform(molecule) = { + if molecule == none or molecule.type != "molecule" { return () } - let angles = calculate-all-angles(graph) - let elements = build-molecule-structure(graph, root, (), angles, in-ring: false) + let elements = () + elements += transform_unit(molecule.first, transform) + for item in molecule.rest { + elements += transform_bond(item.bond) + elements += transform_unit(item.unit, transform) + } return elements } - -#let transform-molecule(graph) = { - transform(graph) -} \ No newline at end of file From 223ada6c4805e0fd95f3c7f763d385573d177786 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 01:42:31 +0900 Subject: [PATCH 13/30] refactor: update angle calculation to follow parser --- src/elements/molecule/iupac-angle.typ | 475 +++++++------------------- 1 file changed, 131 insertions(+), 344 deletions(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index aeea935..0ce40a7 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -1,375 +1,162 @@ -// IUPAC-compliant molecular structure angle calculation module -// Assigns appropriate angles based on vertex bonding states according to IUPAC rules - #import "@preview/cetz:0.2.2" -// Basic angle definitions (relative angles) -#let ANGLE-STRAIGHT = 0deg // Straight line -#let ANGLE-REVERSE = 180deg // Reverse -#let ANGLE-UP = 60deg // Upward (for zigzag pattern) -#let ANGLE-DOWN = -60deg // Downward (for zigzag pattern) -#let MAIN-CHAIN-INITIAL-ANGLE = 30deg // Main chain initial angle -#let BRANCH-ANGLE-STEP = 60deg // Angle step between branches -#let FULL-CIRCLE = 360deg // Full circle angle +// IUPAC angles +#let IUPAC_ANGLES = ( + main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg }, + zigzag_up: 60deg, + zigzag_down: -60deg, + incoming: -180deg, + + sp3: (60deg, -60deg, -120deg, -180deg), + sp2: (60deg, -60deg, -180deg), + sp: (0deg, -180deg), + angles: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), -// Determine up/down for zigzag pattern -#let get-zigzag-angle(index) = { - if calc.rem(index, 2) == 0 { ANGLE-UP } else { ANGLE-DOWN } -} + ring: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), +) -// Determine node hybridization state -#let determine-hybridization(node-id, graph) = { - // Use cache to avoid duplicate calculations - let node-edges = graph.edges.filter(e => e.from == node-id or e.to == node-id) - - let double-bond-count = 0 - for edge in node-edges { - // Triple bond indicates sp hybridization - if edge.data != none and edge.data.bondType == "triple" { - return "sp" - } - // Count double bonds - if edge.data != none and edge.data.bondType == "double" { - double-bond-count += 1 - } - } - - // Determine hybridization state - return if double-bond-count == 2 { "sp" } - else if double-bond-count == 1 { "sp2" } +#let get_hybridization(bonds) = { + let types = bonds.map(b => b.at("symbol", default: "-")) + if "#" in types or types.filter(t => t == "#").len() >= 2 { "sp" } + else if "=" in types { "sp2" } else { "sp3" } } -// Calculate main chain angle (relative angle) -// Zigzag pattern: up→down→up... -#let calculate-main-chain-angle(edge-index, hybridization, is-first-edge, main-chain-length) = { - // sp hybridization means straight line - if hybridization == "sp" { - return ANGLE-STRAIGHT - } - - // Process first edge - if is-first-edge { - if main-chain-length >= 2 { - return MAIN-CHAIN-INITIAL-ANGLE - } else { - return ANGLE-STRAIGHT - } - } - - // Zigzag pattern (based on edge index) - return get-zigzag-angle(edge-index) -} - -// Calculate branch angles (relative angles) -// Equally spaced in appropriate range based on situation -#let calculate-branch-angles(node-id, graph, edge-index, is-root-node, main-chain-length, branch-edges, has-incoming-main, has-outgoing-main, is-in-branch: false) = { - let branch-angles = () - let total-branches = branch-edges.len() + if is-in-branch { 1 } else { 0 } - - if total-branches == 0 { return branch-angles } +#let calc_unit_angles(unit, prev_bond, next_bond, current_angle) = { + // Count all bonds + let n = unit.branches.len() + if prev_bond != none { 1 } else { 0 } + if next_bond != none { 1 } else { 0 } - // Receive connection status from upstream - let has-incoming = has-incoming-main - let has-outgoing = has-outgoing-main + let bonds = () + if prev_bond != none { bonds.push(prev_bond) } + if next_bond != none { bonds.push(next_bond) } - // Determine split range and direction (simplified) - let center-angle = if not has-outgoing { - ANGLE-STRAIGHT - } else if not has-incoming { - ANGLE-REVERSE - } else { - // Reverse direction of zigzag pattern - - get-zigzag-angle(edge-index) - } + let hyb = get_hybridization(bonds) + let angles = if hyb == "sp3" and n == 4 { IUPAC_ANGLES.sp3 } + else if hyb == "sp2" and n == 3 { IUPAC_ANGLES.sp2 } + else if hyb == "sp" and n == 2 { IUPAC_ANGLES.sp } + else { (IUPAC_ANGLES.angles)(n) } - // Case of single branch - if total-branches == 1 { - branch-angles.push(center-angle) - return branch-angles - } + // Calculate branch angles + let branch_angles = () + let angle_idx = 0 - // Calculate angle step for multiple branches - let step-angle = if has-incoming and has-outgoing { - BRANCH-ANGLE-STEP / (total-branches - 1) - } else { - let n = total-branches + if has-incoming { 1 } + if has-outgoing { 1 } - FULL-CIRCLE / n - } + if prev_bond != none { angle_idx += 1 } // Skip incoming angle + if next_bond != none { angle_idx += 1 } // Skip outgoing angle - // Place branch angles at equal intervals - let start-angle = center-angle - step-angle * (total-branches - 1) / 2 - for i in range(total-branches) { - branch-angles.push(start-angle + i * step-angle) + for _ in unit.branches { + branch_angles.push(current_angle + angles.at(calc.rem(angle_idx, angles.len()))) + angle_idx += 1 } - return branch-angles + return branch_angles } -// Calculate angles for all edges from node -#let calculate-edge-angles(node-id, graph, edge-index, is-root-node, main-chain-length, node-info: none, is-in-branch: false) = { - // Get edges and hybridization from node info (calculate defaults if not present) - let edges = node-info.at("edges", default: graph.edges.filter(e => e.from == node-id)) - - if edges.len() == 0 { - return (:) - } - - let hybridization = node-info.at("hybridization", default: determine-hybridization(node-id, graph)) - - // Classify edges by role (efficiently process in single loop) - let main-edges = () - let branch-edges = () - - // Classify edges and check connection status simultaneously - let has-incoming-main = false - let has-outgoing-main = false - - for edge in edges { - let role = edge.data.at("role", default: "main") - if role == "branch" { - branch-edges.push(edge) - } else { - main-edges.push(edge) - has-outgoing-main = true - } - } - - // Check incoming edges (only needed when has-outgoing is true) - if branch-edges.len() > 0 { - for edge in graph.edges { - if edge.to == node-id and edge.data.at("role", default: "main") == "main" { - has-incoming-main = true - break - } - } - } - - // Dictionary to store angles - let angles = (:) - for (index, edge) in main-edges.enumerate() { - let is-first-edge = is-root-node and index == 0 - let angle = calculate-main-chain-angle(edge-index + index, hybridization, is-first-edge, main-chain-length) - let key = str(edge.from) + "->" + str(edge.to) - angles.insert(key, angle) - } - - // Calculate branch edge angles - if branch-edges.len() > 0 { - let branch-angles-list = calculate-branch-angles( - node-id, graph, edge-index, is-root-node, main-chain-length, - branch-edges, has-incoming-main, has-outgoing-main, is-in-branch: is-in-branch - ) - for (index, edge) in branch-edges.enumerate() { - let angle = branch-angles-list.at(index) - let key = str(edge.from) + "->" + str(edge.to) - angles.insert(key, angle) - } - } - - return angles -} - -// Calculate main chain length -#let count-main-chain-edges(graph, start-node) = { - let visited = () - let count = 0 - let current-node = start-node - - while current-node != none { - if current-node in visited { break } - visited.push(current-node) +// Main angle calculation +#let calculate_angles(molecule) = { + if molecule == none or molecule.type != "molecule" { return molecule } + + let chain_length = if molecule.rest != none { molecule.rest.len() } else { 0 } + let current_angle = (IUPAC_ANGLES.main_chain_initial)(chain_length) + + // Create new first unit with angles + let new_first = molecule.first + if molecule.first != none { + let unit = molecule.first + let next_bond = if molecule.rest != none and molecule.rest.len() > 0 { + molecule.rest.at(0).bond + } else { none } - let main-edges = graph.edges.filter(e => - e.from == current-node and - e.data.at("role", default: "main") == "main" - ) + let branch_angles = calc_unit_angles(unit, none, next_bond, current_angle) - if main-edges.len() > 0 { - count += 1 - current-node = main-edges.at(0).to - } else { - break - } - } - - return count -} - -// Traverse graph and calculate angles -#let traverse-and-calculate(graph, node-id, visited, edge-index, angles, is-root, node-cache: none, is-in-branch: false) = { - if node-id in visited { return (angles, edge-index) } - visited.push(node-id) - - let main-chain-length = if is-root { - count-main-chain-edges(graph, node-id) - } else { - 0 - } - - // Get information from cache (empty dictionary if not present) - let node-info = if node-cache != none { node-cache.at(node-id, default: (:)) } else { (:) } - - let node-angles = calculate-edge-angles( - node-id, graph, edge-index, is-root, main-chain-length, - node-info: node-info, is-in-branch: is-in-branch - ) - - // Merge angles (more efficient merging) - angles = angles + node-angles - - // Get edges (from node-info, or filter if not present) - let edges = node-info.at("edges", default: graph.edges.filter(e => e.from == node-id)) - - let next-edge-index = edge-index - - for edge in edges { - if edge.to not in visited { - let role = edge.data.at("role", default: "main") - if role == "main" { - // For main chain, advance the index - next-edge-index = next-edge-index + 1 - let (new-angles, new-index) = traverse-and-calculate(graph, edge.to, visited, next-edge-index, angles, false, node-cache: node-cache, is-in-branch: is-in-branch) - angles = new-angles - next-edge-index = new-index - } else if role == "branch" { - let (new-angles, _) = traverse-and-calculate(graph, edge.to, visited, 0, angles, false, node-cache: node-cache, is-in-branch: true) - angles = new-angles + // Create new branches with angles + let new_branches = () + if unit.branches != none { + for (b_idx, branch) in unit.branches.enumerate() { + if b_idx < branch_angles.len() { + // Create new bond with angle + let new_bond = if branch.bond != none { + branch.bond + (relative: branch_angles.at(b_idx)) + } else { branch.bond } + + // Recursively calculate angles for branch body + let new_body = if branch.at("body", default: none) != none { + calculate_angles(branch.body) + } else { branch.at("body", default: none) } + + // Create new branch with updated bond and body + let new_branch = ( + type: branch.type, + bond: new_bond, + body: new_body + ) + new_branches.push(new_branch) + } else { + new_branches.push(branch) + } } } + new_first = unit + (branches: new_branches) } - return (angles, next-edge-index) -} - -// Convert relative angles to absolute angles -#let relative-to-absolute(angles, graph) = { - let absolute-angles = (:) - let node-absolute-angles = (:) // Track cumulative angle for each node - - // Start from root with 0deg absolute angle - let root = graph.at("root", default: none) - if root == none and graph.nodes.len() > 0 { - root = if "node_0" in graph.nodes { "node_0" } else { graph.nodes.keys().first() } - } - - if root != none { - node-absolute-angles.insert(root, 0deg) - } - - // Convert each relative angle to absolute - for (edge-key, relative-angle) in angles { - // Parse edge key to get from and to nodes - let parts = edge-key.split("->") - if parts.len() == 2 { - let from-node = parts.at(0) - let to-node = parts.at(1) + // Process rest and create new rest array + let new_rest = () + if molecule.rest != none { + for (r_idx, item) in molecule.rest.enumerate() { + current_angle += if calc.rem(r_idx, 2) == 0 { + IUPAC_ANGLES.zigzag_up + } else { + IUPAC_ANGLES.zigzag_down + } - // Get the absolute angle of the from node (default to 0deg) - let from-absolute = node-absolute-angles.at(from-node, default: 0deg) + // Create new bond with absolute angle for main chain + let new_bond = item.bond + (absolute: current_angle) - // Calculate absolute angle by adding relative to from node's absolute - let absolute-angle = from-absolute + relative-angle + let unit = item.unit + let prev_bond = new_bond + let next_bond = if r_idx + 1 < molecule.rest.len() { + molecule.rest.at(r_idx + 1).bond + } else { none } - // Store for this edge - absolute-angles.insert(edge-key, absolute-angle) + let branch_angles = calc_unit_angles(unit, prev_bond, next_bond, current_angle) - // Update the to-node's absolute angle for next calculations - node-absolute-angles.insert(to-node, absolute-angle) - } - } - - return absolute-angles -} - -// ===== Main Functions ===== - -// Calculate relative angles for entire graph -#let calculate-all-relative-angles(graph, is-ring: false) = { - // Efficient search for root node - let root = graph.at("root", default: none) - if root == none and graph.nodes.len() > 0 { - // Prioritize searching for node_0 - root = if "node_0" in graph.nodes { "node_0" } else { graph.nodes.keys().first() } - } - - if root == none { return (:) } - - // Pre-build node info cache (performance optimization for large graphs) - let node-cache = (:) - if graph.edges.len() > 50 { // Use cache only for large graphs - for edge in graph.edges { - let from-node = edge.from - if from-node not in node-cache { - node-cache.insert(from-node, (edges: (), hybridization: none)) + // Create new unit with branch angles + let new_unit = unit + if unit != none and unit.branches != none { + let new_branches = () + for (b_idx, branch) in unit.branches.enumerate() { + if b_idx < branch_angles.len() { + // Create new bond with angle + let new_bond = if branch.bond != none { + branch.bond + (relative: branch_angles.at(b_idx)) + } else { branch.bond } + + // Recursively calculate angles for branch body + let new_body = if branch.at("body", default: none) != none { + calculate_angles(branch.body) + } else { branch.at("body", default: none) } + + // Create new branch with updated bond and body + let new_branch = ( + type: branch.type, + bond: new_bond, + body: new_body + ) + new_branches.push(new_branch) + } else { + new_branches.push(branch) + } + } + new_unit = unit + (branches: new_branches) } - node-cache.at(from-node).edges.push(edge) - } - // Pre-calculate hybridization state for each node - for (node-id, _) in graph.nodes { - // Create entries for all nodes when building node-cache - if node-id not in node-cache { - node-cache.insert(node-id, (edges: (), hybridization: none)) - } - node-cache.at(node-id).hybridization = determine-hybridization(node-id, graph) + + new_rest.push((bond: new_bond, unit: new_unit)) } } - let visited = () - let angles = (:) - let (final-angles, _) = traverse-and-calculate( - graph, root, visited, 0, angles, true, - node-cache: if node-cache.len() > 0 { node-cache } else { none }, - is-in-branch: false - ) - - return final-angles -} - -// Calculate absolute angles for entire graph -#let calculate-all-angles(graph, is-ring: false) = { - let relative-angles = calculate-all-relative-angles(graph, is-ring: is-ring) - - return relative-to-absolute(relative-angles, graph) -} - -// Calculate ring rotation angle (using relative angles only) -#let calculate-ring-rotation(node-id, graph, angles) = { - let outgoing-edges = graph.edges.filter(e => - e.from == node-id and e.data.at("role", default: "main") == "main" - ) - let incoming-edges = graph.edges.filter(e => - e.to == node-id and e.data.at("role", default: "main") == "main" + // Return new molecule with angles + return ( + type: "molecule", + first: new_first, + rest: new_rest ) - - let node = graph.nodes.at(node-id) - let ring-size = node.data.size - let has-incoming = incoming-edges.len() > 0 - let has-outgoing = outgoing-edges.len() > 0 - let base-adjustment = 180deg / ring-size - 90deg - - if has-incoming and not has-outgoing { - return ANGLE-STRAIGHT + base-adjustment - } - - let outgoing-angle = 0deg - for edge in outgoing-edges { - if edge.from == node-id { - let edge-key = str(edge.from) + "->" + str(edge.to) - outgoing-angle = angles.at(edge-key, default: 0deg) - break - } - } - - if not has-incoming and has-outgoing { - return ANGLE-REVERSE + outgoing-angle + base-adjustment - } - - if has-incoming and has-outgoing { - // Determine ring orientation (reverse of zigzag pattern) - let inverse-angle = if outgoing-angle == ANGLE-UP { ANGLE-DOWN } else { ANGLE-UP } - return inverse-angle + base-adjustment - } - - return base-adjustment -} +} \ No newline at end of file From 5929af5862b23e0f38b5d74e72fbd8789164fe9e Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 03:21:21 +0900 Subject: [PATCH 14/30] refactor: Improve clarity and remove redundant comments --- src/elements/molecule/iupac-angle.typ | 2 +- src/elements/molecule/molecule.typ | 18 +- src/elements/molecule/parser.typ | 245 ++++++++++---------------- src/elements/molecule/transformer.typ | 56 +++--- 4 files changed, 130 insertions(+), 191 deletions(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index 0ce40a7..ee59f17 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -1,6 +1,6 @@ #import "@preview/cetz:0.2.2" -// IUPAC angles +// relative angles #let IUPAC_ANGLES = ( main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg }, zigzag_up: 60deg, diff --git a/src/elements/molecule/molecule.typ b/src/elements/molecule/molecule.typ index bda4837..41b7029 100644 --- a/src/elements/molecule/molecule.typ +++ b/src/elements/molecule/molecule.typ @@ -1,5 +1,4 @@ #import "parser.typ": alchemist-parser -#import "iupac-angle.typ": calculate_angles #import "transformer.typ": transform #let molecule(content, name: none, ..args) = { @@ -12,20 +11,5 @@ } let reaction = parsed.value - reaction.terms.map(term => { - if term.type == "term" { - let molecule = term.molecule - let molecule_with_angles = calculate_angles(molecule) - transform(molecule_with_angles) - } else if term.type == "operator" { - let op = term.symbol - (( - type: "operator", - symbol: eval("$" + op + "$"), - margin: 0em, - ),) - } else { - panic("Unknown term type: " + term.type) - } - }).join() + transform(reaction) } diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index dbc0b7a..d42cea6 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -8,7 +8,7 @@ // operator expression OPERATOR ::= CONDITION? OP_SYMBOL CONDITION? CONDITION ::= "[" TEXT "]" - OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" | MATH_TEXT + OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" | MATH_TEXT // TODO: Unicode is difficult to parse // molecule syntax molecule ::= unit (bond unit)* @@ -51,13 +51,28 @@ // ==================== Utilities ==================== -#let digit = satisfy(c => c >= "0" and c <= "9", name: "digit") -#let letter = satisfy(c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), name: "letter") -#let uppercase = satisfy(c => c >= "A" and c <= "Z", name: "uppercase") -#let lowercase = satisfy(c => c >= "a" and c <= "z", name: "lowercase") -#let alphanum = satisfy(c => { - (c >= "0" and c <= "9") or (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") -}, name: "alphanum") +#let digit = satisfy( + c => c >= "0" and c <= "9", name: "digit" +) +#let letter = satisfy( + c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), name: "letter" +) +#let uppercase = satisfy( + c => c >= "A" and c <= "Z", name: "uppercase" +) +#let lowercase = satisfy( + c => c >= "a" and c <= "z", name: "lowercase" +) +#let alphanum = satisfy( + c => (c >= "0" and c <= "9") or (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), + name: "alphanum" +) +#let identifier = { + map(seq(choice(letter, char("_")), many(choice(alphanum, char("_")))), r => { + let (first, rest) = r + first + rest.join() + }) +} #let whitespace = one-of(" \t\n\r") #let ws = many(whitespace) #let space = one-of(" \t") @@ -65,29 +80,6 @@ #let lexeme(p) = map(seq(p, ws), r => r.at(0)) #let token(s) = lexeme(str(s)) -// Integer -#let integer = { - let sign = optional(one-of("+-")) - let digits = some(digit) - - map(seq(sign, digits), r => { - let (s, d) = r - let n = int(d.join()) - if s == "-" { -n } else { n } - }) -} - -// Identifier -#let identifier = { - let first = choice(letter, char("_")) - let rest = many(choice(alphanum, char("_"))) - - map(seq(first, rest), r => { - let (f, rs) = r - f + rs.join() - }) -} - // String with escapes #let string-lit(quote: "\"") = { let escape = map(seq(char("\\"), any()), r => { @@ -106,15 +98,56 @@ map(between(char(quote), char(quote), many(char-parser)), chars => chars.join()) } -// ==================== Fragment Components ==================== +// ==================== Labels and Options ==================== + +#let label-parser = map( + seq(char(":"), identifier), + parts => { + let (_, id) = parts + id.value + } +) + +#let bond-label-parser = map( + seq(str("::"), identifier), + parts => { + let (_, id) = parts + id.value + } +) + +#let value-parser = choice( + map(some(digit), ds => int(ds.join())), + identifier +) + +#let key-value-pair-parser = label( + map( + seq(identifier, token(":"), value-parser), + parts => { + let (key, _, value) = parts + (key: key, value: value) + } + ), + "key-value pair (e.g., color: red, angle: 45)" +) + +#let options-parser = label( + map( + seq(char("("), sep-by(key-value-pair-parser, token(",")), char(")")), + parts => { + let (_, pairs, _) = parts + (type: "options", pairs: pairs) + } + ), + "options in parentheses" +) + +// ==================== Fragments ==================== -// ELEMENT ::= [A-Z][a-z]? #let element-parser = label( map( - seq( - uppercase, - optional(lowercase), - ), + seq(uppercase, optional(lowercase)), parts => { let (upper, lower) = parts if lower != none { upper + lower } else { upper } @@ -123,16 +156,14 @@ "element symbol (e.g., H, Ca, Fe)" ) -// SUBSCRIPT ::= DIGIT+ #let subscript-parser = label( map( some(digit), digits => int(digits.join()) ), - "subscript number" + "subscript number (e.g., CH4, O2)" ) -// ISOTOPE ::= "^" DIGIT+ #let isotope-parser = label( map( seq(char("^"), some(digit)), @@ -144,7 +175,6 @@ "isotope notation (e.g., ^14, ^235)" ) -// CHARGE ::= "^" DIGIT? ("+" | "-") #let charge-parser = label( map( seq(char("^"), optional(digit), choice(char("+"), char("-"))), @@ -156,7 +186,6 @@ "charge notation (e.g., ^+, ^2-, ^3+)" ) -// ELEMENT_GROUP ::= ISOTOPE? ELEMENT SUBSCRIPT? #let element-group-parser = map( seq(optional(isotope-parser), element-parser, optional(subscript-parser)), parts => { @@ -170,16 +199,17 @@ } ) -// ABBREVIATION ::= [a-z][A-Za-z]+ -#let abbreviation-parser = map( - seq(lowercase, some(letter)), - parts => { - let (first, rest) = parts - (type: "abbreviation", value: first + rest.join()) - } +#let abbreviation-parser = label( + map( + seq(lowercase, some(letter)), + parts => { + let (first, rest) = parts + (type: "abbreviation", value: first + rest.join()) + } + ), + "abbreviation (e.g., tBu, iPr)" ) -// MATH_TEXT ::= "$" [^$]+ "$" #let math-text-parser = label( map( seq( @@ -200,7 +230,7 @@ map( seq( char("("), - lazy(() => atoms-parser()), + atoms-parser, char(")"), optional(subscript-parser) ), @@ -218,7 +248,7 @@ map( seq( char("["), - lazy(() => atoms-parser()), + atoms-parser, char("]") ), parts => { @@ -230,7 +260,6 @@ ) } -// Forward declarations for recursive parsers #let atoms-part-parser(atoms-parser) = choice( element-group-parser, parenthetical-parser(atoms-parser), @@ -238,9 +267,11 @@ ) #let atoms-parser() = { + let self = lazy(() => atoms-parser()) + label( map( - seq(some(atoms-part-parser(atoms-parser)), optional(charge-parser)), + seq(some(atoms-part-parser(self)), optional(charge-parser)), parts => { let (parts, charge) = parts (type: "atoms", parts: parts, charge: charge) @@ -250,69 +281,12 @@ ) } -// FRAGMENT ::= ATOMS | ABBREVIATION | MATH_TEXT #let fragment-content-parser = choice( atoms-parser(), abbreviation-parser, math-text-parser, - element-parser // Fallback for simple elements -) - -// IDENTIFIER ::= [a-zA-Z_][a-zA-Z0-9_]* -#let identifier-parser = map( - seq( - satisfy(c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") or c == "_", name: "id-first"), - many(alphanum) - ), - parts => { - let (first, rest) = parts - (type: "identifier", value: first + rest.join()) - } -) - -// label ::= ":" IDENTIFIER -#let label-parser = map( - seq(char(":"), identifier-parser), - parts => { - let (_, id) = parts - (type: "label", name: id.value) - } -) - -// ==================== Options ==================== - -// Simple value parser -#let value-parser = choice( - map(some(digit), ds => int(ds.join())), - identifier-parser -) - -// key_value_pair ::= IDENTIFIER ":" value -#let key-value-pair-parser = label( - map( - seq(identifier-parser, token(":"), value-parser), - parts => { - let (key, _, value) = parts - (key: key.value, value: value) - } - ), - "key-value pair (e.g., color: red, angle: 45)" ) -// options ::= "(" key_value_pair ("," key_value_pair)* ")" -#let options-parser = label( - map( - seq(char("("), sep-by(key-value-pair-parser, token(",")), char(")")), - parts => { - let (_, pairs, _) = parts - (type: "options", pairs: pairs) - } - ), - "options in parentheses" -) - -// ==================== Fragment ==================== - #let process-atom(parts) = { let type = parts.type @@ -340,7 +314,6 @@ } } -// fragment ::= FRAGMENT label? options? #let fragment-parser = label( map( seq(fragment-content-parser, optional(label-parser), optional(options-parser)), @@ -359,7 +332,6 @@ // ==================== Bonds ==================== -// BOND_SYMBOL ::= "-" | "=" | "#" | ">" | "<" | ":>" | "<:" | "|>" | "<|" #let bond-symbol-parser = choice( str("->"), // Arrow prevention str("=>"), // Arrow prevention @@ -374,16 +346,6 @@ char("<") ) -// bond_label ::= "::" IDENTIFIER -#let bond-label-parser = map( - seq(str("::"), identifier-parser), - parts => { - let (_, id) = parts - (type: "bond-label", name: id.value) - } -) - -// bond ::= BOND_SYMBOL bond_label? options? #let bond-parser = label( map( seq(bond-symbol-parser, optional(bond-label-parser), optional(options-parser)), @@ -402,19 +364,11 @@ // ==================== Rings ==================== -// ring ::= "@" DIGIT+ "(" molecule? ")" label? options? #let ring-parser(mol-parser) = label( lazy(() => map( seq( - char("@"), - some(digit), - optional( - seq( - char("("), - mol-parser, - char(")"), - ), - ), + char("@"), some(digit), + optional(seq(char("("), mol-parser, char(")"))), optional(label-parser), optional(options-parser) ), @@ -439,28 +393,20 @@ // ==================== Molecules ==================== -// node ::= fragment | ring | label #let node-parser(mol-parser) = choice( fragment-parser, ring-parser(mol-parser), label-parser ) -// branch ::= "(" bond molecule ")" #let branch-parser(mol-parser) = map( - seq( - char("("), - bond-parser, - mol-parser, - char(")") - ), + seq(char("("), bond-parser, mol-parser, char(")")), parts => { let (_, bond, molecule, _) = parts (type: "branch", bond: bond, body: molecule) } ) -// unit ::= (node | implicit_node) branch* #let unit-parser(mol-parser) = map( seq(optional(node-parser(mol-parser)), many(branch-parser(mol-parser))), parts => { @@ -473,9 +419,7 @@ } ) -// molecule ::= unit (bond unit)* #let molecule-parser() = { - // Create a lazy reference to itself let self = lazy(() => molecule-parser()) label( @@ -502,7 +446,6 @@ // ==================== Reactions ==================== -// COEFFICIENT ::= DIGIT+ #let coefficient-parser = label( map( some(digit), @@ -511,7 +454,6 @@ "stoichiometric coefficient" ) -// OP_SYMBOL ::= "->" | "<=>" | "⇌" | "→" | "⇄" | "=>" | "-->" | "+" #let op-symbol-parser = choice( str("<=>"), str("-->"), @@ -520,10 +462,10 @@ str("⇌"), str("→"), str("⇄"), - char("+") + char("+"), + math-text-parser ) -// CONDITION ::= "[" TEXT "]" #let condition-parser = label( map( seq(char("["), many(none-of("]")), char("]")), @@ -535,21 +477,19 @@ "reaction condition (e.g., [heat], [catalyst])" ) -// OPERATOR ::= CONDITION? OP_SYMBOL CONDITION? #let operator-parser = map( seq(ws, optional(condition-parser), op-symbol-parser, optional(condition-parser), ws), parts => { - let (_, cond1, symbol, cond2, _) = parts + let (_, cond1, op, cond2, _) = parts ( type: "operator", condition-before: cond1, - symbol: symbol, + op: op, condition-after: cond2 ) } ) -// term ::= COEFFICIENT? molecule #let term-parser = label( map( seq(optional(coefficient-parser), molecule-parser()), @@ -565,7 +505,6 @@ "reaction term" ) -// reaction ::= term (OPERATOR term)* #let reaction-parser = label( map( seq(term-parser, many(seq(operator-parser, term-parser))), diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 87231f1..2a94cd1 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,7 +1,7 @@ // Simple transformer for new parser structure with angles already calculated +#import "iupac-angle.typ": calculate_angles #import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left -// Create fragment element #let transform_fragment(node) = { let atoms = node.name ( @@ -16,7 +16,6 @@ ) } -// Create bond element based on symbol and angle #let transform_bond(bond) = { let symbol = bond.symbol let absolute = bond.at("absolute", default: none) @@ -43,10 +42,10 @@ bond-fn(absolute: absolute, relative: relative) } -#let transform_branch(branch, transform) = { +#let transform_branch(branch, transform_molecule) = { let elements = () elements += transform_bond(branch.bond) - elements += transform(branch.body) + elements += transform_molecule(branch.body) return ( type: "branch", @@ -55,49 +54,66 @@ ) } -#let transform_cycle(cycle, transform) = { +#let transform_cycle(cycle, transform_molecule) = { return ( type: "cycle", faces: cycle.faces, - body: if cycle.body != none { transform(cycle.body) } else { none }, + body: if cycle.body != none { transform_molecule(cycle.body) } else { none }, args: (:), ) } -// Transform a single unit (node + branches) -#let transform_unit(unit, transform) = { +#let transform_unit(unit, transform_molecule) = { let elements = () - // Add node content if unit.node != none { if unit.node.type == "fragment" { elements.push(transform_fragment(unit.node)) } else if unit.node.type == "cycle" { - elements.push(transform_cycle(unit.node, transform)) + elements.push(transform_cycle(unit.node, transform_molecule)) } else if unit.node.type == "implicit" { } else { panic("Unknown node type: " + unit.node.type) } } - // Add branches - elements += unit.branches.map(branch => transform_branch(branch, transform)) + elements += unit.branches.map(branch => transform_branch(branch, transform_molecule)) return elements } -// Main transformation function -#let transform(molecule) = { - if molecule == none or molecule.type != "molecule" { - return () - } +#let transform_molecule(molecule) = { + if molecule == none { return () } + if type(molecule) == array { return molecule } + if molecule.type != "molecule" { return () } let elements = () - elements += transform_unit(molecule.first, transform) + elements += transform_unit(molecule.first, transform_molecule) for item in molecule.rest { elements += transform_bond(item.bond) - elements += transform_unit(item.unit, transform) + elements += transform_unit(item.unit, transform_molecule) } - return elements } + +#let transform_reaction(reaction) = { + reaction.terms.map(term => { + if term.type == "term" { + let molecule = term.molecule + let molecule_with_angles = calculate_angles(molecule) + + transform_molecule(molecule_with_angles) + } else if term.type == "operator" { + (( + type: "operator", + name: none, + op: eval("$" + term.op + "$"), + margin: 0.7em, + ),) + } else { + panic("Unknown term type: " + term.type) + } + }).join() +} + +#let transform = transform_reaction From 46314ea054a3c9495d10dd4da017f5b1fa9276f5 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 04:53:57 +0900 Subject: [PATCH 15/30] feat: Implement simple error handling --- src/elements/molecule/molecule.typ | 8 ++--- src/elements/molecule/parser.typ | 45 ++++++++++++++++++++++++--- src/elements/molecule/transformer.typ | 1 - src/utils/parser-combinator.typ | 20 ++++++++++-- 4 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/elements/molecule/molecule.typ b/src/elements/molecule/molecule.typ index 41b7029..28c3980 100644 --- a/src/elements/molecule/molecule.typ +++ b/src/elements/molecule/molecule.typ @@ -4,10 +4,10 @@ #let molecule(content, name: none, ..args) = { let parsed = alchemist-parser(content) if not parsed.success { - panic([ - Failed to parse #content reaction: #parsed.error - #repr(parsed) - ]) + // Display error inline + return text(fill: red)[ + Failed to parse "#content": #parsed.error + ] } let reaction = parsed.value diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index d42cea6..bdff541 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -169,7 +169,7 @@ seq(char("^"), some(digit)), parts => { let (_, digits) = parts - (type: "isotope", value: int(digits.join())) + int(digits.join()) } ), "isotope notation (e.g., ^14, ^235)" @@ -287,6 +287,7 @@ math-text-parser, ) +// Atoms to math content processor #let process-atom(parts) = { let type = parts.type @@ -365,7 +366,7 @@ // ==================== Rings ==================== #let ring-parser(mol-parser) = label( - lazy(() => map( + map( seq( char("@"), some(digit), optional(seq(char("("), mol-parser, char(")"))), @@ -387,7 +388,7 @@ options: opts ) } - )), + ), "ring notation (e.g., @6, @5(C-C-C-C-C))" ) @@ -527,6 +528,40 @@ // ==================== Parse Functions ==================== #let alchemist-parser(input) = { - let full = map(seq(reaction-parser, eof()), r => r.at(0)) - parse(full, input) + let reaction_result = parse(reaction-parser, input) + + if not reaction_result.success { + return reaction_result + } + + if reaction_result.rest != "" { + let rest = reaction_result.rest + let preview_len = calc.min(10, rest.len()) + let preview = rest.slice(0, preview_len) + + let first_char = rest.at(0) + let error_msg = if first_char >= "0" and first_char <= "9" { + "Unexpected number '" + preview + "' - numbers must be part of subscripts, isotopes, or ring sizes" + } else if first_char == "&" or first_char == "!" or first_char == "%" { + "Invalid character '" + first_char + "' - not a valid bond or atom symbol" + } else if first_char == "@" { + "Invalid ring notation starting with '" + preview + "' - expected @N where N is a number" + } else if first_char == "^" { + "Invalid isotope or charge notation starting with '" + preview + "'" + } else if first_char == "-" or first_char == "=" or first_char == "#" { + "Unexpected bond '" + first_char + "' - bonds must connect atoms" + } else { + "Unexpected content '" + preview + "' after valid molecule" + } + + return ( + success: false, + value: none, + error: error_msg + " (at position " + repr(input.len() - rest.len()) + ")", + rest: rest + ) + } + + // Success - all input was consumed + return reaction_result } diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 2a94cd1..3e2cf68 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,4 +1,3 @@ -// Simple transformer for new parser structure with angles already calculated #import "iupac-angle.typ": calculate_angles #import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left diff --git a/src/utils/parser-combinator.typ b/src/utils/parser-combinator.typ index 8ace06e..5d0e1e3 100644 --- a/src/utils/parser-combinator.typ +++ b/src/utils/parser-combinator.typ @@ -238,7 +238,7 @@ for i in range(n) { let r = (p.run)(current) - if not r.ok { return err("expected " + str(n) + " items, got " + str(i), current) } + if not r.ok { return err("expected " + repr(n) + " items, got " + repr(i), current) } results.push(r.value) current = r.state } @@ -275,7 +275,14 @@ #let label(p, lbl) = parser(lbl, s => { let r = (p.run)(s) if not r.ok { - err(lbl + " failed: " + r.error, s) + // Create a more descriptive error message + let context_str = if s.pos < s.len { + let preview = (s.peek)(s, n: calc.min(10, s.len - s.pos)) + " at '" + preview + "'" + } else { + " at end of input" + } + err("Expected " + lbl + context_str + " (got: " + r.error + ")", s) } else { r } @@ -322,7 +329,14 @@ ( success: r.ok, value: if r.ok { r.value } else { none }, - error: if not r.ok { r.error } else { none }, + error: if not r.ok { + let pos_info = if r.state.pos > 0 { + " (at position " + repr(r.state.pos) + ")" + } else { + "" + } + r.error + pos_info + } else { none }, rest: (r.state.remaining)(r.state), ) } From 98b8c40cb5f84dc3ee176773126a744dc99f77cb Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 20:57:33 +0900 Subject: [PATCH 16/30] feat: Add edge and common integration test cases --- tests/molecule-edge-cases/.gitignore | 4 + tests/molecule-edge-cases/test.typ | 221 ++++++++++++++++++++++++++ tests/molecule-integration/.gitignore | 4 + tests/molecule-integration/test.typ | 130 +++++++++++++++ 4 files changed, 359 insertions(+) create mode 100644 tests/molecule-edge-cases/.gitignore create mode 100644 tests/molecule-edge-cases/test.typ create mode 100644 tests/molecule-integration/.gitignore create mode 100644 tests/molecule-integration/test.typ diff --git a/tests/molecule-edge-cases/.gitignore b/tests/molecule-edge-cases/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-edge-cases/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ new file mode 100644 index 0000000..bf611a4 --- /dev/null +++ b/tests/molecule-edge-cases/test.typ @@ -0,0 +1,221 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/parser.typ": alchemist-parser +#import "../../src/elements/molecule/transformer.typ": transform +#import "../../src/elements/molecule/molecule.typ": molecule + +// Error handling and edge cases test += Molecule Edge Cases and Error Handling Tests + +#let test-parse(input, description) = { + let parsed = alchemist-parser(input) + if not parsed.success { + return [ + == #description + text(fill: red)[ + Failed to parse "#input": #parsed.error + ] + ] + } + + let reaction = parsed.value + let result = transform(reaction) + + [ + == #description + ✓ Input: #input + // #skeletize(result) + #linebreak() + Parsed successfully with #parsed.value.terms.len() nodes + // #repr(parsed.value) + #linebreak() + #repr(result) + // #linebreak() + ] +} + += Parser edge cases +// Empty input +#test-parse("", "Empty input") + +// Whitespace only +#test-parse(" ", "Whitespace only") + +// Single atom +#test-parse("C", "Single atom") +#test-parse("H", "Single hydrogen") +#test-parse("Cl", "Single chlorine") + +// Bond only (no atom) +#test-parse("-", "Bond only") +#test-parse("=", "Double bond only") +#test-parse("#", "Triple bond only") + +// Incomplete bond +#test-parse("CH3-", "Trailing bond") +#test-parse("-CH3", "Leading bond") +#test-parse("CH3--CH3", "Double dash") +#test-parse("CH3-A(-CH3)(-CH3)-CH3", "Multiple branches") + +// Invalid parenthesis +#test-parse("CH3(", "Unclosed parenthesis") +#test-parse("CH3)", "Extra closing parenthesis") +#test-parse("CH3(-OH", "Unclosed branch") +#test-parse("CH3-OH)", "Extra closing in chain") + +// Deeply nested structure +#test-parse("C(-C(-C(-C(-C(-OH)))))", "Deeply nested (5 levels)") +#test-parse("C(-(-(-(-OH))))", "Multiple nested empty branches") + +// Complex branching patterns +#test-parse("C()()()()", "Empty branches") +#test-parse("C(-CH3)()(-OH)", "Mixed empty and filled branches") +#test-parse("C(-)(-)(=)", "Branches with only bonds") + +// Long chain +#let long-chain = "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-OH" +#test-parse(long-chain, "Very long chain (50 CH2 units)") + += Ring structure edge cases + +// Basic rings +#test-parse("@6", "Simple 6-membered ring") +#test-parse("@5", "5-membered ring") +#test-parse("@4", "4-membered ring") +#test-parse("@3", "3-membered ring") +#test-parse("@7", "7-membered ring") +#test-parse("@8", "8-membered ring") + +// Ring size boundary values +#test-parse("@2", "2-membered ring (chemically impossible)") +#test-parse("@1", "1-membered ring (invalid)") +#test-parse("@10", "10-membered ring") +#test-parse("@15", "15-membered ring (macrocycle)") +#test-parse("@20", "20-membered ring (large macrocycle)") + +// Ring bond patterns +#test-parse("@6(------)", "Ring with explicit single bonds") +#test-parse("@6(=-=-=-)", "Benzene with alternating bonds") +#test-parse("@6(======)", "Ring with all double bonds (impossible)") +#test-parse("@6(#-----)", "Ring with triple bond (strained)") + +// Ring substituents +#test-parse("@6(------CH3)", "Ring with one substituent") +#test-parse("@6(------CH3)(-OH)", "Ring with adjacent substituents") +#test-parse("@6(------CH3)-OH", "Ring with separated substituents") +#test-parse("---(-OH)", "Ring with 1,3-substituents") +#test-parse("@6(------CH3)--(-OH)", "Ring with 1,3-substituents") +#test-parse("@6(------CH3)---(-OH)", "Ring with 1,4-substituents") + +// Ring with complex substituents +#test-parse("@6(--CH2-CH3---)", "Ring with ethyl group") +#test-parse("@6(------CH(-CH3)2)", "Ring with isopropyl group") +#test-parse("@6(-----(-C(=O)-OH)-)", "Ring with carboxyl group") +#test-parse("@6(----CH2-CH2-CH3)(-OH)", "Ring with propyl and hydroxyl") + +// Ring with nested branches +#test-parse("@6(-CH2(-OH))", "Ring with branched substituent") +#test-parse("@6(-CH(-CH3)(-OH))", "Ring with multi-branched substituent") + +// Ring connected to chain +#test-parse("CH3-@6", "Methyl attached to ring") +#test-parse("@6-CH3", "Ring attached to methyl") +#test-parse("CH3-@6-CH3", "Ring in middle of chain") +#test-parse("CH3-CH2-@6-CH2-CH3", "Ring embedded in chain") + +// Multiple rings +#test-parse("@6-@6", "Two connected rings (biphenyl)") +#test-parse("@6-A", "Two connected rings (biphenyl)") +#test-parse("@6-CH2-@6", "Rings connected by methylene") +#test-parse("@6=@6", "Rings connected by double bond") +#test-parse("@6-@5", "Different sized rings connected") + +// Invalid ring notation (expected parse error) +// #test-parse("@", "Asterisk without size") +// #test-parse("@0", "Zero-sized ring") +// #test-parse("@-1", "Negative ring size") +// #test-parse("@a", "Non-numeric ring size") +// #test-parse("@6.5", "Decimal ring size") + +// Ring with empty parentheses +#test-parse("@6()", "Ring with empty parentheses") +#test-parse("@6(())", "Ring with nested empty parentheses") +#test-parse("@6(CH3)", "Ring with atom in parentheses (invalid)") +#test-parse("@6(-)", "Ring with only bond") +#test-parse("@6((-))", "Ring with parenthesized bond") +#test-parse("@6(-=-=-(-O-CH3)=)", "Ring with carboxyl group") + +// Label special cases +#test-parse("CH3:", "Label without name") +#test-parse("CH3::", "Double colon") +#test-parse("CH3:label1:label2", "Multiple labels") +#test-parse(":labelonly", "Label without atom") + +// Consecutive bonds +#test-parse("CH3=-CH3", "Mixed bond types") +// Multiple different bonds are grammar errors so omitted +#test-parse("CH3<>CH3", "Consecutive wedge bonds") + +// Number processing +#test-parse("C2H6", "Molecular formula style") +#test-parse("CH23", "Large subscript") +#test-parse("C123H456", "Very large numbers") + += Conversion edge cases + + +// Circular reference possibility +// Ring structure nested test is omitted + +// Very many branches +#let many-branches = "C(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)" +#test-parse(many-branches, "10 branches on single carbon") + +// Interchangeable bond patterns +// Complex bond patterns are omitted + +// All bond types +// Complex bond types are omitted + += Actually chemically invalid structures + +== Chemically impossible but syntactically valid + +// Pentavalent carbon +#test-parse("C(-H)(-H)(-H)(-H)(-H)", "Pentavalent carbon") + +// 2-membered ring +// 2-membered ring test is omitted + +// Triple nested +#test-parse("CH3(-CH2(-CH(-CH2(-OH))))", "Quadruple nested") + += Boundary value test + +== Minimum case +#test-parse("H", "Single hydrogen") +#test-parse("C", "Single carbon") + +== Maximum case +// Very long atom name +#test-parse("CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2OH", "Long atom string") + +// Very long label +#test-parse("CH3:verylonglabelnamethatshouldstillwork", "Long label name") + += Unicode and special characters + +// #test-parse("CH₃-CH₂-OH", "Unicode subscripts") +// #test-parse("CH³⁺", "Unicode superscript charge") +// #test-parse("CH3–CH2–OH", "En dash bonds") +// #test-parse("CH3−CH2−OH", "Minus sign bonds") + += Performance test input + +// Huge linear structure +// 100 CH2 is too long so omitted + +// Huge branching structure +// 20 branches are omitted + +// Deep nested +// 10 levels of nesting are omitted diff --git a/tests/molecule-integration/.gitignore b/tests/molecule-integration/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-integration/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-integration/test.typ b/tests/molecule-integration/test.typ new file mode 100644 index 0000000..ebb26a7 --- /dev/null +++ b/tests/molecule-integration/test.typ @@ -0,0 +1,130 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/molecule.typ": molecule + += Molecule Integration Tests + +== Organic Compounds + +=== Ethanol +#skeletize(molecule("CH3-CH2-OH")) + +=== Isopropanol +#skeletize(molecule("CH3-CH(-OH)-CH3")) + +=== Acetone +#skeletize(molecule("CH3-C(=O)-CH3")) + +=== Acetic Acid +#skeletize(molecule("CH3-C(=O)-OH")) + +=== Benzene Ring Structure +#skeletize(molecule("@6(-=-=-=)")) + +== Amino Acids + +=== Glycine +#skeletize(molecule("NH2-CH2-C(=O)-OH")) + +=== Alanine +#skeletize(molecule("NH2-CH(-CH3)-C(=O)-OH")) + +=== Serine +#skeletize(molecule("NH2-CH(-CH2(-OH))-C(=O)-OH")) + +== Sugars + +=== Linear Glucose +#skeletize(molecule("CHO-CH(-OH)-CH(-OH)-CH(-OH)-CH(-OH)-CH2OH")) + +=== Linear Fructose +#skeletize(molecule("CH2OH-C(=O)-CH(-OH)-CH(-OH)-CH(-OH)-CH2OH")) + +== Fatty Acids + +=== Butyric Acid +#skeletize(molecule("CH3-CH2-CH2-C(=O)-OH")) + +=== Palmitic Acid +#skeletize(molecule("CH3-(CH2)14-C(=O)-OH")) + +== Complex Branching Structures + +=== tert-Butyl Alcohol +#skeletize(molecule("C(-CH3)(-CH3)(-CH3)-OH")) + +=== Neopentane +#skeletize(molecule("C(-CH3)(-CH3)(-CH3)-CH3")) + +=== Complex Branching Alcohol +#skeletize(molecule("CH3-C(-CH3)(-CH2(-OH))-CH2-CH3")) + +== Unsaturated Compounds + +=== Ethylene +#skeletize(molecule("CH2=CH2")) + +=== Acetylene +#skeletize(molecule("HC#CH")) + +=== Butadiene +#skeletize(molecule("CH2=CH-CH=CH2")) + +=== Acrylic Acid +#skeletize(molecule("CH2=CH-C(=O)-OH")) + +== Cyclic Compounds + +=== Cyclohexane +#skeletize(molecule("@6(------)")) + +=== Cyclohexanol +#skeletize(molecule("@6(-----(-OH)-)")) + +=== Methylcyclohexane +#skeletize(molecule("@6(------)-CH3")) + +=== 1,4-Dimethylcyclohexane +#skeletize(molecule("@6((-CH3)---(-CH3)---)")) + +== Labeled Structures + +=== Reaction Site Marking +#skeletize(molecule("CH3:start-CH2-CH2-OH:end")) + +=== Substituent Identification +#skeletize(molecule("CH3-CH:carbon(-OH:hydroxyl)-CH3")) + +== Stereochemistry + +=== Wedge Bond (Stereochemistry) +#skeletize(molecule("CH3CH3")) + +=== Dashed Wedge Bond +#skeletize(molecule("CH3<|CH(-OH)|>CH3")) + +== Polymers + +=== Polyethylene Unit +// #skeletize(molecule("(-CH2-CH2-)n")) + +=== Polystyrene Unit +// #skeletize(molecule("(-CH2-CH(-@6)-)n")) + +== Complex Natural Compounds (Simplified) + +=== Caffeine Skeleton (Simplified) +#skeletize(molecule("@6(=O)(-N(-CH3))-N-@5(-N(-CH3))=N")) + +=== Cholesterol Skeleton (Simplified) +// #skeletize(molecule("@6-@6-@6-@5(-CH3)(-CH(-CH3)-CH2-CH2-CH2-CH(-CH3)2)")) + +== Pharmaceutical Skeleton (Simplified) + +=== Aspirin +#skeletize(molecule("@6(-=-=(-O-C-CH3)-=)")) + +=== Paracetamol +#skeletize(molecule("@6(-OH)---(-NH-C(=O)-CH3)")) + +=== Ibuprofen Skeleton +// #skeletize(molecule("@6(-CH(-CH3)-CH2-CH(-CH3)2)---(-CH(-CH3)-C(=O)-OH)")) From 33c6c00d39a464902af9e34601cb1c8960cf7051 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 12 Sep 2025 21:20:39 +0900 Subject: [PATCH 17/30] feat: Implement validator parser combinator --- src/elements/molecule/parser.typ | 17 +++++++++++++++-- src/utils/parser-combinator.typ | 13 +++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index bdff541..2aaa0fa 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -365,16 +365,29 @@ // ==================== Rings ==================== +#let ring-size-parser = validate( + some(digit), + digits => { + if digits.len() == 0 { + return (false, "Ring notation (e.g., @6, @5(C-C-C-C-C)) must have at least one digit") + } + let num = int(digits.join()) + (num >= 3, "Ring size must be at least 3") + }, +) + #let ring-parser(mol-parser) = label( map( seq( - char("@"), some(digit), + char("@"), ring-size-parser, optional(seq(char("("), mol-parser, char(")"))), optional(label-parser), optional(options-parser) ), parts => { let (_, digits, mol, lbl, opts) = parts + let faces = int(digits.join()) + if type(mol) == array { let (_, mol, _) = mol } else { @@ -382,7 +395,7 @@ } ( type: "cycle", - faces: int(digits.join()), + faces: faces, body: mol, label: lbl, options: opts diff --git a/src/utils/parser-combinator.typ b/src/utils/parser-combinator.typ index 5d0e1e3..51a0df7 100644 --- a/src/utils/parser-combinator.typ +++ b/src/utils/parser-combinator.typ @@ -218,6 +218,19 @@ ok(results, current) }) +// Validate parsed value +#let validate(p, validator) = parser("validate", s => { + let result = (p.run)(s) + if not result.ok { return result } + + let (valid, error-msg) = validator(result.value) + if not valid { + return err(error-msg, s) + } + + ok(result.value, result.state) +}) + // Separated by (at least one) #let sep-by1(p, separator) = parser("sep-by1", s => { let first = (p.run)(s) From 1cbcf5867681942ccbc4bffbcd9bc2834026c47f Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Sat, 13 Sep 2025 00:30:59 +0900 Subject: [PATCH 18/30] feat: Implement connecting points with labeling by merging label --- src/elements/molecule/iupac-angle.typ | 15 +- src/elements/molecule/merger.typ | 261 ++++++++++++++++++++++++++ src/elements/molecule/parser.typ | 191 +++++++++++-------- src/elements/molecule/transformer.typ | 106 +++++++++-- tests/molecule-edge-cases/test.typ | 2 + 5 files changed, 471 insertions(+), 104 deletions(-) create mode 100644 src/elements/molecule/merger.typ diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index ee59f17..cfa8f97 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -23,8 +23,9 @@ } #let calc_unit_angles(unit, prev_bond, next_bond, current_angle) = { + let branches = unit.at("branches", default: ()) // Count all bonds - let n = unit.branches.len() + if prev_bond != none { 1 } else { 0 } + if next_bond != none { 1 } else { 0 } + let n = branches.len() + if prev_bond != none { 1 } else { 0 } + if next_bond != none { 1 } else { 0 } let bonds = () if prev_bond != none { bonds.push(prev_bond) } @@ -43,7 +44,7 @@ if prev_bond != none { angle_idx += 1 } // Skip incoming angle if next_bond != none { angle_idx += 1 } // Skip outgoing angle - for _ in unit.branches { + for _ in branches { branch_angles.push(current_angle + angles.at(calc.rem(angle_idx, angles.len()))) angle_idx += 1 } @@ -70,8 +71,9 @@ // Create new branches with angles let new_branches = () - if unit.branches != none { - for (b_idx, branch) in unit.branches.enumerate() { + let branches = unit.at("branches", default: ()) + if branches != none and branches.len() > 0 { + for (b_idx, branch) in branches.enumerate() { if b_idx < branch_angles.len() { // Create new bond with angle let new_bond = if branch.bond != none { @@ -121,9 +123,10 @@ // Create new unit with branch angles let new_unit = unit - if unit != none and unit.branches != none { + let branches = unit.at("branches", default: ()) + if unit != none and branches != none and branches.len() > 0 { let new_branches = () - for (b_idx, branch) in unit.branches.enumerate() { + for (b_idx, branch) in branches.enumerate() { if b_idx < branch_angles.len() { // Create new bond with angle let new_bond = if branch.bond != none { diff --git a/src/elements/molecule/merger.typ b/src/elements/molecule/merger.typ new file mode 100644 index 0000000..d475f11 --- /dev/null +++ b/src/elements/molecule/merger.typ @@ -0,0 +1,261 @@ +// Merger logic for label continuation system + +#import "parser.typ": molecule-parser, parse + +// Parse multiple molecular parts separated by whitespace +#let parse-molecular-parts(input) = { + // Split by whitespace while preserving structure + let parts = () + let current = "" + let depth = 0 + + for char in input { + if char == "(" { depth += 1 } + else if char == ")" { depth -= 1 } + + if depth == 0 and (char == " " or char == "\t" or char == "\n") { + if current != "" { + parts.push(current) + current = "" + } + } else { + current += char + } + } + + if current != "" { + parts.push(current) + } + + // Parse each part + let molecules = () + for part in parts { + let result = parse(molecule-parser(), part) + if result.success { + molecules.push(result.value) + } + } + + return molecules +} + +// Separate main molecule from continuations +#let separate-molecules(molecules) = { + let main = none + let continuations = () + + for mol in molecules { + if mol.at("is_continuation", default: false) { + continuations.push(mol) + } else if main == none { + main = mol + } else { + // Multiple main molecules - merge them sequentially + // This could be extended to handle multiple main molecules + panic("Multiple main molecules found - only one non-continuation molecule allowed") + } + } + + if main == none { + panic("No main molecule found - at least one non-continuation molecule required") + } + + return (main: main, continuations: continuations) +} + +// Build registry of labeled fragments in the molecule +#let build-label-registry(mol) = { + let visit(mol, path, registry) = { + if mol == none { return registry } + + let result = registry + + // Process first unit + if mol.at("first", default: none) != none { + let unit = mol.first + let unit_path = path + ((type: "first"),) + + // Check if node has a label + if unit.node != none and unit.node.type == "fragment" { + if unit.node.at("name", default: none) != none { + result.insert(unit.node.name, (unit: unit, path: unit_path)) + } + } + + // Process branches of first unit + if unit.at("branches", default: ()) != none { + for (idx, branch) in unit.branches.enumerate() { + if branch.at("body", default: none) != none { + result = visit(branch.body, unit_path + ((type: "branch", idx: idx),), result) + } + } + } + } + + // Process rest units + if mol.at("rest", default: ()) != none { + for (idx, item) in mol.rest.enumerate() { + if item.at("unit", default: none) != none { + let unit = item.unit + let unit_path = path + ((type: "rest", idx: idx),) + + // Check if node has a label + if unit.node != none and unit.node.type == "fragment" { + if unit.node.at("name", default: none) != none { + result.insert(unit.node.name, (unit: unit, path: unit_path)) + } + } + + // Process branches + if unit.at("branches", default: ()) != none { + for (b_idx, branch) in unit.branches.enumerate() { + if branch.at("body", default: none) != none { + result = visit(branch.body, unit_path + ((type: "branch", idx: b_idx),), result) + } + } + } + } + } + } + + return result + } + + return visit(mol, (), (:)) +} + +// Create a molecule structure from a continuation +#let create-molecule-from-continuation(cont) = { + // Remove the label reference from the first unit + let new_first = if cont.rest.len() > 0 { + cont.rest.at(0).unit + } else { + (type: "unit", node: (type: "implicit"), branches: ()) + } + + let new_rest = if cont.rest.len() > 1 { + cont.rest.slice(1) + } else { + () + } + + ( + type: "molecule", + first: new_first, + rest: new_rest + ) +} + +// Add branch to a unit at the given path in the molecule +#let add-branch-at-path(mol, path, branch) = { + if path.len() == 0 { return mol } + + let step = path.at(0) + let remaining_path = path.slice(1) + + if step.type == "first" { + if remaining_path.len() == 0 { + // We're at the target unit + if mol.first.at("branches", default: none) == none { + mol.first.branches = () + } + mol.first.branches.push(branch) + } else { + // Continue deeper + mol.first = add-branch-to-unit(mol.first, remaining_path, branch) + } + } else if step.type == "rest" { + let idx = step.idx + if remaining_path.len() == 0 { + // We're at the target unit + if mol.rest.at(idx).unit.at("branches", default: none) == none { + mol.rest.at(idx).unit.branches = () + } + mol.rest.at(idx).unit.branches.push(branch) + } else { + // Continue deeper + mol.rest.at(idx).unit = add-branch-to-unit(mol.rest.at(idx).unit, remaining_path, branch) + } + } + + return mol +} + +// Helper to add branch to a unit with nested branches +#let add-branch-to-unit(unit, path, branch) = { + if path.len() == 0 { + if unit.at("branches", default: none) == none { + unit.branches = () + } + unit.branches.push(branch) + return unit + } + + let step = path.at(0) + if step.type == "branch" { + let idx = step.idx + let remaining_path = path.slice(1) + unit.branches.at(idx).body = add-branch-at-path(unit.branches.at(idx).body, remaining_path, branch) + } + + return unit +} + +// Merge continuations into the main molecule +#let merge-continuations(main, continuations) = { + let registry = build-label-registry(main) + let result = main + + for cont in continuations { + let target_label = cont.continuation_label + + if target_label not in registry { + panic("Label '" + target_label + "' not found in main molecule") + } + + let target_info = registry.at(target_label) + let target_path = target_info.path + + // Get the first bond from the continuation + let cont_bond = if cont.rest.len() > 0 { + cont.rest.at(0).bond + } else { + (type: "bond", symbol: "-") // Default to single bond + } + + // Create branch from continuation + let branch_body = create-molecule-from-continuation(cont) + + let branch = ( + type: "branch", + bond: cont_bond, + body: branch_body + ) + + // Add branch at the target path + result = add-branch-at-path(result, target_path, branch) + } + + return result +} + +// Main entry point for parsing with continuations +#let parse-with-continuations(input) = { + // Parse all molecular parts + let molecules = parse-molecular-parts(input) + + if molecules.len() == 0 { + panic("No valid molecules found in input") + } + + if molecules.len() == 1 { + // Single molecule, no continuations needed + return molecules.at(0) + } + + // Separate main and continuations + let separated = separate-molecules(molecules) + + // Merge continuations into main structure + return merge-continuations(separated.main, separated.continuations) +} \ No newline at end of file diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 2aaa0fa..475a15d 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -54,6 +54,7 @@ #let digit = satisfy( c => c >= "0" and c <= "9", name: "digit" ) +#let integer = map(some(digit), ds => int(ds.join())) #let letter = satisfy( c => (c >= "a" and c <= "z") or (c >= "A" and c <= "Z"), name: "letter" ) @@ -104,40 +105,41 @@ seq(char(":"), identifier), parts => { let (_, id) = parts - id.value + id } ) -#let bond-label-parser = map( - seq(str("::"), identifier), +#let label-ref-parser = map( + seq(char(":"), identifier), parts => { let (_, id) = parts - id.value + (type: "label-ref", label: id) } ) -#let value-parser = choice( - map(some(digit), ds => int(ds.join())), - identifier +#let bond-label-parser = map( + seq(str("::"), identifier), + parts => { + let (_, id) = parts + id + } ) +// TODO: Fix this parser to support multiple key-value pairs #let key-value-pair-parser = label( map( - seq(identifier, token(":"), value-parser), - parts => { - let (key, _, value) = parts - (key: key, value: value) - } + seq(identifier, token(":"), some(none-of(")"))), + parts => parts.join() ), "key-value pair (e.g., color: red, angle: 45)" ) #let options-parser = label( map( - seq(char("("), sep-by(key-value-pair-parser, token(",")), char(")")), + seq(char("("), key-value-pair-parser, char(")")), parts => { let (_, pairs, _) = parts - (type: "options", pairs: pairs) + (type: "options", pairs: eval("(" + pairs + ")")) } ), "options in parentheses" @@ -157,19 +159,16 @@ ) #let subscript-parser = label( - map( - some(digit), - digits => int(digits.join()) - ), + integer, "subscript number (e.g., CH4, O2)" ) #let isotope-parser = label( map( - seq(char("^"), some(digit)), + seq(char("^"), integer), parts => { - let (_, digits) = parts - int(digits.join()) + let (_, num) = parts + num } ), "isotope notation (e.g., ^14, ^235)" @@ -225,40 +224,36 @@ "math text notation (e.g., $\\Delta$, $\\mu$)" ) -#let parenthetical-parser(atoms-parser) = { - label( - map( - seq( - char("("), - atoms-parser, - char(")"), - optional(subscript-parser) - ), - parts => { - let (_, atoms, _, subscript) = parts - (type: "parenthetical", atoms: atoms, subscript: subscript) - } +#let parenthetical-parser(atoms-parser) = label( + map( + seq( + char("("), + atoms-parser, + char(")"), + optional(subscript-parser) ), - "parenthetical group (e.g., (OH)2, (NH4)2)" - ) -} + parts => { + let (_, atoms, _, subscript) = parts + (type: "parenthetical", atoms: atoms, subscript: subscript) + } + ), + "parenthetical group (e.g., (OH)2, (NH4)2)" +) -#let complex-parser(atoms-parser) = { - label( - map( - seq( - char("["), - atoms-parser, - char("]") - ), - parts => { - let (_, atoms, _) = parts - (type: "complex", atoms: atoms) - } +#let complex-parser(atoms-parser) = label( + map( + seq( + char("["), + atoms-parser, + char("]") ), - "complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+)" - ) -} + parts => { + let (_, atoms, _) = parts + (type: "complex", atoms: atoms) + } + ), + "complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+)" +) #let atoms-part-parser(atoms-parser) = choice( element-group-parser, @@ -292,9 +287,9 @@ let type = parts.type if type == "atoms" { - let base = parts.parts.map(process-atom).join() + let base = parts.parts.map(process-atom) if parts.charge != none { - math.attach(base, tr: eval("$" + parts.charge + "$")) + (math.attach(base.join(), tr: eval("$" + parts.charge + "$")),) } else { base } @@ -322,9 +317,9 @@ let (content, label, options) = parts ( type: "fragment", - name: process-atom(content), - label: label, - options: options + atoms: process-atom(content), + name: label, + options: if options != none { options } else { (:) } ) } ), @@ -355,8 +350,8 @@ ( type: "bond", symbol: symbol, - label: label, - options: options + name: label, + options: if options != none { options } else { (:) } ) } ), @@ -365,15 +360,20 @@ // ==================== Rings ==================== -#let ring-size-parser = validate( - some(digit), - digits => { - if digits.len() == 0 { - return (false, "Ring notation (e.g., @6, @5(C-C-C-C-C)) must have at least one digit") - } - let num = int(digits.join()) - (num >= 3, "Ring size must be at least 3") - }, +#let ring-size-parser = map( + validate( + some(digit), + digits => { + if digits.len() == 0 { + return (false, "Ring notation (e.g., @6, @5(C-C-C-C-C)) must have at least one digit") + } + let num = int(digits.join()) + (num >= 3, "Ring size must be at least 3") + }, + ), + parts => { + int(parts.join()) + } ) #let ring-parser(mol-parser) = label( @@ -385,8 +385,7 @@ optional(options-parser) ), parts => { - let (_, digits, mol, lbl, opts) = parts - let faces = int(digits.join()) + let (_, faces, mol, lbl, opts) = parts if type(mol) == array { let (_, mol, _) = mol @@ -410,14 +409,14 @@ #let node-parser(mol-parser) = choice( fragment-parser, ring-parser(mol-parser), - label-parser + label-ref-parser ) #let branch-parser(mol-parser) = map( seq(char("("), bond-parser, mol-parser, char(")")), parts => { - let (_, bond, molecule, _) = parts - (type: "branch", bond: bond, body: molecule) + let (_, bond, molecules, _) = parts + (type: "branch", bond: bond, body: molecules) } ) @@ -425,11 +424,22 @@ seq(optional(node-parser(mol-parser)), many(branch-parser(mol-parser))), parts => { let (node, branches) = parts - ( - type: "unit", - node: if node == none { (type: "implicit") } else { node }, - branches: branches - ) + + // Handle label reference as a special unit type + if node != none and node.type == "label-ref" { + ( + type: "unit", + node: node, + branches: branches, + is_continuation_start: true + ) + } else { + ( + type: "unit", + node: if node == none { (type: "implicit") } else { node }, + branches: branches + ) + } } ) @@ -444,8 +454,19 @@ ), nodes => { let (first, rest) = nodes + + // Check if molecule starts with a label reference + let is_continuation = first.at("is_continuation_start", default: false) + let continuation_label = if is_continuation and first.node.type == "label-ref" { + first.node.label + } else { + none + } + ( type: "molecule", + is_continuation: is_continuation, + continuation_label: continuation_label, first: first, rest: rest.map(unit => { let (bond, unit) = unit @@ -462,8 +483,8 @@ #let coefficient-parser = label( map( - some(digit), - digits => (type: "coefficient", value: int(digits.join())) + integer, + num => (type: "coefficient", value: num) ), "stoichiometric coefficient" ) @@ -541,6 +562,14 @@ // ==================== Parse Functions ==================== #let alchemist-parser(input) = { + if input == "" { + return ( + success: true, + value: (type: "reaction", terms: ()), + rest: input + ) + } + let reaction_result = parse(reaction-parser, input) if not reaction_result.success { @@ -557,8 +586,6 @@ "Unexpected number '" + preview + "' - numbers must be part of subscripts, isotopes, or ring sizes" } else if first_char == "&" or first_char == "!" or first_char == "%" { "Invalid character '" + first_char + "' - not a valid bond or atom symbol" - } else if first_char == "@" { - "Invalid ring notation starting with '" + preview + "' - expected @N where N is a number" } else if first_char == "^" { "Invalid isotope or charge notation starting with '" + preview + "'" } else if first_char == "-" or first_char == "=" or first_char == "#" { diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 3e2cf68..39ff1c5 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,24 +1,28 @@ #import "iupac-angle.typ": calculate_angles -#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left +#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left, cram-hollow-right, cram-hollow-left #let transform_fragment(node) = { - let atoms = node.name + let atoms = node.atoms ( type: "fragment", - atoms: if type(atoms) == array { atoms } else { (atoms,) }, - name: none, - links: (:), - lewis: (), - vertical: false, - count: if type(atoms) == array { atoms.len() } else { 1 }, - colors: none, + atoms: atoms, + name: node.at("name", default: none), + links: node.at("links", default: (:)), + lewis: node.options.at("lewis", default: ()), + vertical: node.options.at("vertical", default: false), + count: atoms.len(), + colors: node.options.at("colors", default: none), + label: node.at("name", default: none), + ..node.options, ) } #let transform_bond(bond) = { let symbol = bond.symbol + let name = bond.at("name", default: none) let absolute = bond.at("absolute", default: none) let relative = bond.at("relative", default: none) + let options = bond.options let bond-fn = if symbol == "-" { single @@ -34,11 +38,15 @@ cram-dashed-right } else if symbol == "<:" { cram-dashed-left + } else if symbol == "|>" { + cram-hollow-right + } else if symbol == "<|" { + cram-hollow-left } else { single } - bond-fn(absolute: absolute, relative: relative) + bond-fn(absolute: absolute, relative: relative, name: name, ..options) } #let transform_branch(branch, transform_molecule) = { @@ -62,17 +70,43 @@ ) } +#let transform_label_reference(label) = { + ( + type: "label-reference", + label: label.label, + links: (:), + ) +} + #let transform_unit(unit, transform_molecule) = { let elements = () if unit.node != none { - if unit.node.type == "fragment" { + // Debug: log what we're getting + if type(unit.node) == str { + // This shouldn't happen, but if node is a raw string, treat it as a label reference + elements.push(transform_label_reference((type: "label", label: unit.node))) + elements += unit.at("branches", default: ()).map(branch => transform_branch(branch, transform_molecule)) + return elements + } + + // Check if node has a type field (it should always have one from the parser) + let node_type = if type(unit.node) == dictionary { + unit.node.at("type", default: "unknown") + } else { + "unknown" + } + + if node_type == "fragment" { elements.push(transform_fragment(unit.node)) - } else if unit.node.type == "cycle" { + } else if node_type == "cycle" { elements.push(transform_cycle(unit.node, transform_molecule)) - } else if unit.node.type == "implicit" { + } else if node_type == "label-ref" { + elements.push(transform_label_reference(unit.node)) + } else if node_type == "implicit" { + // Implicit node, no action needed } else { - panic("Unknown node type: " + unit.node.type) + panic("Unknown node type: " + node_type + " for node: " + repr(unit.node)) } } @@ -95,13 +129,53 @@ return elements } +// Resolve label references after transformation +#let resolve_label_references(elements) = { + // First pass: collect all labeled atoms and their positions + let label_positions = (:) + let position = 0 + + for (i, element) in elements.enumerate() { + if element.type == "fragment" and element.at("label", default: none) != none { + label_positions.insert(element.label, i) + } + } + + // Second pass: resolve label references + let resolved = elements + for (i, element) in elements.enumerate() { + if element.type == "label-reference" { + let label = element.label + if label in label_positions { + let target_pos = label_positions.at(label) + resolved.at(i) = ( + type: "link", + from: i, + to: target_pos, + bond: single(), // Default to single bond, could be customized + ) + } else { + // Label not found, keep as unresolved reference or error + resolved.at(i) = ( + type: "error", + message: "Unresolved label reference: " + label, + ) + } + } + } + + return resolved +} + #let transform_reaction(reaction) = { reaction.terms.map(term => { if term.type == "term" { let molecule = term.molecule let molecule_with_angles = calculate_angles(molecule) - - transform_molecule(molecule_with_angles) + + let transformed = transform_molecule(molecule_with_angles) + // Resolve any label references in the transformed molecule + resolve_label_references(transformed) } else if term.type == "operator" { (( type: "operator", diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ index bf611a4..50ae0a2 100644 --- a/tests/molecule-edge-cases/test.typ +++ b/tests/molecule-edge-cases/test.typ @@ -71,6 +71,8 @@ #test-parse("C(-CH3)()(-OH)", "Mixed empty and filled branches") #test-parse("C(-)(-)(=)", "Branches with only bonds") +// #test-parse("$CH_3$-CH2-OH", "Typst math notation") + // Long chain #let long-chain = "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH2-OH" #test-parse(long-chain, "Very long chain (50 CH2 units)") From 2b316040fca87f6b2c4443cf12dc4dfc947e0cfc Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:39:41 +0900 Subject: [PATCH 19/30] feat: Optimize calling depth in parser --- src/elements/molecule/parser.typ | 465 +++++++++++++------------------ src/utils/parser-combinator.typ | 48 ++-- 2 files changed, 223 insertions(+), 290 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 475a15d..bd40859 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -69,7 +69,7 @@ name: "alphanum" ) #let identifier = { - map(seq(choice(letter, char("_")), many(choice(alphanum, char("_")))), r => { + seq(choice(letter, char("_")), many(choice(alphanum, char("_"))), map: r => { let (first, rest) = r first + rest.join() }) @@ -78,12 +78,12 @@ #let ws = many(whitespace) #let space = one-of(" \t") #let newline = choice(str("\r\n"), char("\n")) -#let lexeme(p) = map(seq(p, ws), r => r.at(0)) +#let lexeme(p) = seq(p, ws, map: r => r.at(0)) #let token(s) = lexeme(str(s)) // String with escapes #let string-lit(quote: "\"") = { - let escape = map(seq(char("\\"), any()), r => { + let escape = seq(char("\\"), any(), map: r => { let (_, c) = r if c == "n" { "\n" } else if c == "t" { "\t" } @@ -96,98 +96,82 @@ let normal = none-of(quote + "\\") let char-parser = choice(escape, normal) - map(between(char(quote), char(quote), many(char-parser)), chars => chars.join()) + between(char(quote), char(quote), many(char-parser), map: chars => chars.join()) } // ==================== Labels and Options ==================== -#let label-parser = map( - seq(char(":"), identifier), - parts => { +#let label-parser = seq( + char(":"), identifier, + map: parts => { let (_, id) = parts id } ) -#let label-ref-parser = map( - seq(char(":"), identifier), - parts => { +#let label-ref-parser = seq( + char(":"), identifier, + map: parts => { let (_, id) = parts (type: "label-ref", label: id) } ) -#let bond-label-parser = map( - seq(str("::"), identifier), - parts => { +#let bond-label-parser = seq( + str("::"), identifier, + map: parts => { let (_, id) = parts id } ) // TODO: Fix this parser to support multiple key-value pairs -#let key-value-pair-parser = label( - map( - seq(identifier, token(":"), some(none-of(")"))), - parts => parts.join() - ), - "key-value pair (e.g., color: red, angle: 45)" +// key-value pair (e.g., color: red, angle: 45) +#let key-value-pair-parser = seq( + identifier, token(":"), some(none-of(")")), + map: parts => parts.join() ) -#let options-parser = label( - map( - seq(char("("), key-value-pair-parser, char(")")), - parts => { - let (_, pairs, _) = parts - (type: "options", pairs: eval("(" + pairs + ")")) - } - ), - "options in parentheses" +#let options-parser = seq( + char("("), key-value-pair-parser, char(")"), + map: parts => { + let (_, pairs, _) = parts + (type: "options", pairs: eval("(" + pairs + ")")) + } ) // ==================== Fragments ==================== -#let element-parser = label( - map( - seq(uppercase, optional(lowercase)), - parts => { - let (upper, lower) = parts - if lower != none { upper + lower } else { upper } - } - ), - "element symbol (e.g., H, Ca, Fe)" -) - -#let subscript-parser = label( - integer, - "subscript number (e.g., CH4, O2)" +// element symbol (e.g., H, Ca, Fe) +#let element-parser = seq( + uppercase, optional(lowercase), + map: parts => { + let (upper, lower) = parts + if lower != none { upper + lower } else { upper } + } ) -#let isotope-parser = label( - map( - seq(char("^"), integer), - parts => { - let (_, num) = parts - num - } - ), - "isotope notation (e.g., ^14, ^235)" +// isotope notation (e.g., ^14, ^235) +#let isotope-parser = seq( + char("^"), integer, + map: parts => { + let (_, num) = parts + num + } ) -#let charge-parser = label( - map( - seq(char("^"), optional(digit), choice(char("+"), char("-"))), - parts => { - let (_, d, sign) = parts - d + sign - } - ), - "charge notation (e.g., ^+, ^2-, ^3+)" +// charge notation (e.g., ^+, ^2-, ^3+) +#let charge-parser = seq( + char("^"), optional(digit), choice(char("+"), char("-")), + map: parts => { + let (_, d, sign) = parts + d + sign + } ) -#let element-group-parser = map( - seq(optional(isotope-parser), element-parser, optional(subscript-parser)), - parts => { +#let element-group-parser = seq( + optional(isotope-parser), element-parser, optional(integer), + map: parts => { let (isotope, element, subscript) = parts ( type: "element-group", @@ -198,61 +182,44 @@ } ) -#let abbreviation-parser = label( - map( - seq(lowercase, some(letter)), - parts => { - let (first, rest) = parts - (type: "abbreviation", value: first + rest.join()) - } - ), - "abbreviation (e.g., tBu, iPr)" -) - -#let math-text-parser = label( - map( - seq( - char("$"), - some(none-of("$")), - char("$") - ), - parts => { - let (_, chars, _) = parts - (type: "math-text", value: chars.join()) - } - ), - "math text notation (e.g., $\\Delta$, $\\mu$)" -) - -#let parenthetical-parser(atoms-parser) = label( - map( - seq( - char("("), - atoms-parser, - char(")"), - optional(subscript-parser) - ), - parts => { - let (_, atoms, _, subscript) = parts - (type: "parenthetical", atoms: atoms, subscript: subscript) - } - ), - "parenthetical group (e.g., (OH)2, (NH4)2)" -) - -#let complex-parser(atoms-parser) = label( - map( - seq( - char("["), - atoms-parser, - char("]") - ), - parts => { - let (_, atoms, _) = parts - (type: "complex", atoms: atoms) - } - ), - "complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+)" +// abbreviation (e.g., tBu, iPr) +#let abbreviation-parser = seq( + lowercase, some(letter), + map: parts => { + let (first, rest) = parts + (type: "abbreviation", value: first + rest.join()) + } +) + +// math text notation (e.g., $\\Delta$, $\\mu$) +#let math-text-parser = seq( + char("$"), some(none-of("$")), char("$"), + map: parts => { + let (_, chars, _) = parts + (type: "math-text", value: chars.join()) + } +) + +#let parenthetical-parser(atoms-parser) = seq( + char("("), + atoms-parser, + char(")"), + optional(integer), + map: parts => { + let (_, atoms, _, subscript) = parts + (type: "parenthetical", atoms: atoms, subscript: subscript) + } +) + +// complex notation (e.g., [Fe(CN)6]^3-, [Cu(NH3)4]^2+) +#let complex-parser(atoms-parser) = seq( + char("["), + atoms-parser, + char("]"), + map: parts => { + let (_, atoms, _) = parts + (type: "complex", atoms: atoms) + } ) #let atoms-part-parser(atoms-parser) = choice( @@ -264,15 +231,12 @@ #let atoms-parser() = { let self = lazy(() => atoms-parser()) - label( - map( - seq(some(atoms-part-parser(self)), optional(charge-parser)), - parts => { - let (parts, charge) = parts - (type: "atoms", parts: parts, charge: charge) - } - ), - "atoms composition" + seq( + some(atoms-part-parser(self)), optional(charge-parser), + map: parts => { + let (parts, charge) = parts + (type: "atoms", parts: parts, charge: charge) + } ) } @@ -310,20 +274,17 @@ } } -#let fragment-parser = label( - map( - seq(fragment-content-parser, optional(label-parser), optional(options-parser)), - parts => { - let (content, label, options) = parts - ( - type: "fragment", - atoms: process-atom(content), - name: label, - options: if options != none { options } else { (:) } - ) - } - ), - "molecular fragment" +#let fragment-parser = seq( + fragment-content-parser, optional(label-parser), optional(options-parser), + map: parts => { + let (content, label, options) = parts + ( + type: "fragment", + atoms: process-atom(content), + name: label, + options: if options != none { options } else { (:) } + ) + } ) // ==================== Bonds ==================== @@ -342,20 +303,17 @@ char("<") ) -#let bond-parser = label( - map( - seq(bond-symbol-parser, optional(bond-label-parser), optional(options-parser)), - parts => { - let (symbol, label, options) = parts - ( - type: "bond", - symbol: symbol, - name: label, - options: if options != none { options } else { (:) } - ) - } - ), - "chemical bond" +#let bond-parser = seq( + bond-symbol-parser, optional(bond-label-parser), optional(options-parser), + map: parts => { + let (symbol, label, options) = parts + ( + type: "bond", + symbol: symbol, + name: label, + options: if options != none { options } else { (:) } + ) + } ) // ==================== Rings ==================== @@ -376,32 +334,28 @@ } ) -#let ring-parser(mol-parser) = label( - map( - seq( - char("@"), ring-size-parser, - optional(seq(char("("), mol-parser, char(")"))), - optional(label-parser), - optional(options-parser) - ), - parts => { - let (_, faces, mol, lbl, opts) = parts +// ring notation (e.g., @6, @5(C-C-C-C-C)) +#let ring-parser(mol-parser) = seq( + char("@"), ring-size-parser, + optional(seq(char("("), mol-parser, char(")"))), + optional(label-parser), + optional(options-parser), + map: parts => { + let (_, faces, mol, lbl, opts) = parts - if type(mol) == array { - let (_, mol, _) = mol - } else { - mol = none - } - ( - type: "cycle", - faces: faces, - body: mol, - label: lbl, - options: opts - ) + if type(mol) == array { + let (_, mol, _) = mol + } else { + mol = none } - ), - "ring notation (e.g., @6, @5(C-C-C-C-C))" + ( + type: "cycle", + faces: faces, + body: mol, + label: lbl, + options: opts + ) + } ) // ==================== Molecules ==================== @@ -412,19 +366,19 @@ label-ref-parser ) -#let branch-parser(mol-parser) = map( - seq(char("("), bond-parser, mol-parser, char(")")), - parts => { +#let branch-parser(mol-parser) = seq( + char("("), bond-parser, mol-parser, char(")"), + map: parts => { let (_, bond, molecules, _) = parts (type: "branch", bond: bond, body: molecules) } ) -#let unit-parser(mol-parser) = map( - seq(optional(node-parser(mol-parser)), many(branch-parser(mol-parser))), - parts => { +#let unit-parser(mol-parser) = seq( + optional(node-parser(mol-parser)), many(branch-parser(mol-parser)), + map: parts => { let (node, branches) = parts - + // Handle label reference as a special unit type if node != none and node.type == "label-ref" { ( @@ -446,49 +400,36 @@ #let molecule-parser() = { let self = lazy(() => molecule-parser()) - label( - map( - seq( - unit-parser(self), - many(seq(bond-parser, unit-parser(self))) - ), - nodes => { - let (first, rest) = nodes - - // Check if molecule starts with a label reference - let is_continuation = first.at("is_continuation_start", default: false) - let continuation_label = if is_continuation and first.node.type == "label-ref" { - first.node.label - } else { - none - } - - ( - type: "molecule", - is_continuation: is_continuation, - continuation_label: continuation_label, - first: first, - rest: rest.map(unit => { - let (bond, unit) = unit - (bond: bond, unit: unit) - }) - ) + seq( + unit-parser(self), + many(seq(bond-parser, unit-parser(self))), + map: nodes => { + let (first, rest) = nodes + + // Check if molecule starts with a label reference + let is_continuation = first.at("is_continuation_start", default: false) + let continuation_label = if is_continuation and first.node.type == "label-ref" { + first.node.label + } else { + none } - ), - "molecule structure" + + ( + type: "molecule", + is_continuation: is_continuation, + continuation_label: continuation_label, + first: first, + rest: rest.map(unit => { + let (bond, unit) = unit + (bond: bond, unit: unit) + }) + ) + } ) } // ==================== Reactions ==================== -#let coefficient-parser = label( - map( - integer, - num => (type: "coefficient", value: num) - ), - "stoichiometric coefficient" -) - #let op-symbol-parser = choice( str("<=>"), str("-->"), @@ -501,20 +442,18 @@ math-text-parser ) -#let condition-parser = label( - map( - seq(char("["), many(none-of("]")), char("]")), - parts => { - let (_, chars, _) = parts - (type: "condition", text: chars.join()) - } - ), - "reaction condition (e.g., [heat], [catalyst])" +// reaction condition (e.g., [heat], [catalyst]) +#let condition-parser = seq( + char("["), many(none-of("]")), char("]"), + map: parts => { + let (_, chars, _) = parts + (type: "condition", text: chars.join()) + } ) -#let operator-parser = map( - seq(ws, optional(condition-parser), op-symbol-parser, optional(condition-parser), ws), - parts => { +#let operator-parser = seq( + ws, optional(condition-parser), op-symbol-parser, optional(condition-parser), ws, + map: parts => { let (_, cond1, op, cond2, _) = parts ( type: "operator", @@ -525,38 +464,32 @@ } ) -#let term-parser = label( - map( - seq(optional(coefficient-parser), molecule-parser()), - parts => { - let (coeff, mol) = parts - ( - type: "term", - coefficient: coeff, - molecule: mol - ) - } - ), - "reaction term" -) - -#let reaction-parser = label( - map( - seq(term-parser, many(seq(operator-parser, term-parser))), - parts => { - let (first, rest) = parts - let terms = (first,) - for (operator, term) in rest { - terms.push(operator) - terms.push(term) - } - ( - type: "reaction", - terms: terms - ) +#let term-parser = seq( + optional(integer), molecule-parser(), + map: parts => { + let (coeff, mol) = parts + ( + type: "term", + coefficient: coeff, + molecule: mol + ) + } +) + +#let reaction-parser = seq( + term-parser, many(seq(operator-parser, term-parser)), + map: parts => { + let (first, rest) = parts + let terms = (first,) + for (operator, term) in rest { + terms.push(operator) + terms.push(term) } - ), - "chemical reaction" + ( + type: "reaction", + terms: terms + ) + } ) // ==================== Parse Functions ==================== diff --git a/src/utils/parser-combinator.typ b/src/utils/parser-combinator.typ index 51a0df7..5e67adb 100644 --- a/src/utils/parser-combinator.typ +++ b/src/utils/parser-combinator.typ @@ -98,7 +98,7 @@ // Map result #let map(p, f) = parser("map", s => { - let r = (p.run)(s) + let r = (p.run)(s) // map if r.ok { ok(f(r.value), r.state) } else { @@ -107,7 +107,7 @@ }) // Sequence parsers (variadic) -#let seq(..parsers) = { +#let seq(..parsers, map: results => results) = { let ps = parsers.pos() if ps.len() == 0 { return parser("empty", s => ok((), s)) } if ps.len() == 1 { return ps.at(0) } @@ -117,13 +117,13 @@ let current = s for p in ps { - let r = (p.run)(current) + let r = (p.run)(current) // seq if not r.ok { return r } results.push(r.value) current = r.state } - ok(results, current) + ok(map(results), current) }) } @@ -135,7 +135,7 @@ parser("choice", s => { for p in ps { - let r = (p.run)(s) + let r = (p.run)(s) // choice if r.ok { return r } } err("no alternative matched", s) @@ -144,7 +144,7 @@ // Optional #let optional(p) = parser("optional", s => { - let r = (p.run)(s) + let r = (p.run)(s) // optional if r.ok { ok(r.value, r.state) } else { @@ -164,7 +164,7 @@ let current = s while true { - let r = (p.run)(current) + let r = (p.run)(current) // many if not r.ok { break } results.push(r.value) current = r.state @@ -175,7 +175,7 @@ // One or more #let some(p) = parser("some", s => { - let first = (p.run)(s) + let first = (p.run)(s) // some if not first.ok { return first } let rest = (many(p).run)(first.state) @@ -184,7 +184,7 @@ // Between delimiters #let between(left, right, p) = parser("between", s => { - let l = (left.run)(s) + let l = (left.run)(s) // between if not l.ok { return l } let m = (p.run)(l.state) @@ -198,14 +198,14 @@ // Separated by #let sep-by(p, separator) = parser("sep-by", s => { - let first = (p.run)(s) + let first = (p.run)(s) // sep-by if not first.ok { return ok((), s) } let results = (first.value,) let current = first.state while true { - let sep = (separator.run)(current) + let sep = (separator.run)(current) // sep-by if not sep.ok { break } let item = (p.run)(sep.state) @@ -220,7 +220,7 @@ // Validate parsed value #let validate(p, validator) = parser("validate", s => { - let result = (p.run)(s) + let result = (p.run)(s) // validate if not result.ok { return result } let (valid, error-msg) = validator(result.value) @@ -233,7 +233,7 @@ // Separated by (at least one) #let sep-by1(p, separator) = parser("sep-by1", s => { - let first = (p.run)(s) + let first = (p.run)(s) // sep-by1 if not first.ok { return first } let rest = (sep-by(p, separator).run)(first.state) @@ -250,7 +250,7 @@ let current = s for i in range(n) { - let r = (p.run)(current) + let r = (p.run)(current) // count if not r.ok { return err("expected " + repr(n) + " items, got " + repr(i), current) } results.push(r.value) current = r.state @@ -261,7 +261,7 @@ // Lookahead - check without consuming #let lookahead(p) = parser("lookahead", s => { - let r = (p.run)(s) + let r = (p.run)(s) // lookahead if r.ok { ok(r.value, s) // Don't advance } else { @@ -271,7 +271,7 @@ // Negative lookahead #let not-ahead(p) = parser("not", s => { - let r = (p.run)(s) + let r = (p.run)(s) // not if r.ok { err("unexpected " + repr(r.value), s) } else { @@ -281,12 +281,12 @@ // Attempt - backtrack on failure #let attempt(p) = parser("attempt", s => { - (p.run)(s) + (p.run)(s) // attempt }) // Label for better errors #let label(p, lbl) = parser(lbl, s => { - let r = (p.run)(s) + let r = (p.run)(s) // label if not r.ok { // Create a more descriptive error message let context_str = if s.pos < s.len { @@ -303,7 +303,7 @@ // Chain left - for left-associative operators #let chainl(p, op, default: none) = parser("chainl", s => { - let first = (p.run)(s) + let first = (p.run)(s) // chainl if not first.ok { if default != none { return ok(default, s) @@ -315,10 +315,10 @@ let current = first.state while true { - let o = (op.run)(current) + let o = (op.run)(current) // chainl if not o.ok { break } - let next = (p.run)(o.state) + let next = (p.run)(o.state) // chainl if not next.ok { break } acc = (o.value)(acc, next.value) @@ -330,15 +330,15 @@ // Lazy parser - defers evaluation until needed #let lazy(thunk) = parser("lazy", s => { - let p = thunk() - (p.run)(s) + let p = thunk() // lazy + (p.run)(s) // lazy }) // Run parser #let parse(p, input) = { let s = state(input) - let r = (p.run)(s) + let r = (p.run)(s) // parse ( success: r.ok, value: if r.ok { r.value } else { none }, From d44eb381ee04de97004e27edcf15c9e5e8ce873d Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:47:30 +0900 Subject: [PATCH 20/30] feat: Add deeply nested structure test --- tests/molecule-edge-cases/test.typ | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ index 50ae0a2..e9dbfef 100644 --- a/tests/molecule-edge-cases/test.typ +++ b/tests/molecule-edge-cases/test.typ @@ -63,8 +63,24 @@ #test-parse("CH3-OH)", "Extra closing in chain") // Deeply nested structure -#test-parse("C(-C(-C(-C(-C(-OH)))))", "Deeply nested (5 levels)") -#test-parse("C(-(-(-(-OH))))", "Multiple nested empty branches") +#test-parse("-(-(-(-OH)))", "Deeply nested (3 levels)") +#test-parse("-(-(-(-(-OH))))", "Deeply nested (4 levels)") +#test-parse("-(-(-(-(-(-OH)))))", "Deeply nested (5 levels)") +#test-parse("-(-(-(-(-(-(-OH))))))", "Deeply nested (6 levels)") +#test-parse("-(-(-(-(-(-(-(-OH)))))))", "Deeply nested (7 levels)") +#test-parse("-(-(-(-(-(-(-(-(-OH))))))))", "Deeply nested (8 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-OH)))))))))", "Deeply nested (9 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-(-OH))))))))))", "Deeply nested (10 levels)") +#test-parse("-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))", "Deeply nested (11 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))", "Deeply nested (12 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))", "Deeply nested (13 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))", "Deeply nested (14 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))", "Deeply nested (15 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))", "Deeply nested (16 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))))", "Deeply nested (17 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))))", "Deeply nested (18 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))))))", "Deeply nested (19 levels)") +// #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))))))", "Deeply nested (20 levels)") // Complex branching patterns #test-parse("C()()()()", "Empty branches") From 2f6c2aab228c1be837115c477833391bad077343 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:59:19 +0900 Subject: [PATCH 21/30] fix: fix incorrect angle calculation --- src/elements/molecule/iupac-angle.typ | 269 ++++++++++++++------------ src/elements/molecule/parser.typ | 7 +- src/elements/molecule/transformer.typ | 33 +++- 3 files changed, 172 insertions(+), 137 deletions(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index cfa8f97..fa7019f 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -2,12 +2,12 @@ // relative angles #let IUPAC_ANGLES = ( - main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg }, + main_chain_initial: chain_length => if chain_length >= 2 { -30deg } else { -60deg }, zigzag_up: 60deg, zigzag_down: -60deg, incoming: -180deg, - sp3: (60deg, -60deg, -120deg, -180deg), + sp3: (60deg, -40deg, -80deg, -180deg), sp2: (60deg, -60deg, -180deg), sp: (0deg, -180deg), angles: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), @@ -15,151 +15,164 @@ ring: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), ) -#let get_hybridization(bonds) = { - let types = bonds.map(b => b.at("symbol", default: "-")) - if "#" in types or types.filter(t => t == "#").len() >= 2 { "sp" } - else if "=" in types { "sp2" } - else { "sp3" } +#let hybridization_angles(bonds) = { + let n = bonds.len() + let triple = bonds.filter(b => b.symbol == "#").len() + let double = bonds.filter(b => b.symbol == "=").len() + let other = bonds.filter(b => b.symbol != "#" and b.symbol != "=").len() + + if n == 2 and (triple >= 1 or double >= 2) { IUPAC_ANGLES.sp } + else if n <= 3 and (double >= 1 or other >= 2) { IUPAC_ANGLES.sp2 } + else if n <= 4 { IUPAC_ANGLES.sp3 } + else { (IUPAC_ANGLES.angles)(n) } } -#let calc_unit_angles(unit, prev_bond, next_bond, current_angle) = { +#let calc_unit_angles(unit, prev_bond, next_bond, current_angle, index) = { let branches = unit.at("branches", default: ()) - // Count all bonds - let n = branches.len() + if prev_bond != none { 1 } else { 0 } + if next_bond != none { 1 } else { 0 } - - let bonds = () + if branches.len() == 0 { return () } + + let bonds = branches.map(b => b.bond) if prev_bond != none { bonds.push(prev_bond) } if next_bond != none { bonds.push(next_bond) } + + let angles = hybridization_angles(bonds).filter( + angle => angle != IUPAC_ANGLES.incoming + and (calc.rem(index, 2) == 0 or angle != IUPAC_ANGLES.zigzag_up) + and (calc.rem(index, 2) == 1 or angle != IUPAC_ANGLES.zigzag_down) + ) - let hyb = get_hybridization(bonds) - let angles = if hyb == "sp3" and n == 4 { IUPAC_ANGLES.sp3 } - else if hyb == "sp2" and n == 3 { IUPAC_ANGLES.sp2 } - else if hyb == "sp" and n == 2 { IUPAC_ANGLES.sp } - else { (IUPAC_ANGLES.angles)(n) } + return angles +} + +// Process all branches of a unit with their calculated angles +#let apply_angles_to_branches(branches, branch_angles, calculate_angles_fn) = { + if branches == none or branches.len() == 0 { return () } - // Calculate branch angles - let branch_angles = () - let angle_idx = 0 + let new_branches = () + for (idx, branch) in branches.enumerate() { + if idx < branch_angles.len() { + new_branches.push(( + type: "branch", + bond: branch.bond + (relative: branch_angles.at(idx)), + body: calculate_angles_fn(branch.body) + )) + } else { + new_branches.push(branch) + } + } + return new_branches +} + +// Create a new unit with updated branches +#let create_unit_with_branch_angles(unit, branch_angles, calculate_angles_fn) = { + if unit == none { return unit } - if prev_bond != none { angle_idx += 1 } // Skip incoming angle - if next_bond != none { angle_idx += 1 } // Skip outgoing angle + let branches = unit.at("branches", default: ()) + if branches == none or branches.len() == 0 { return unit } - for _ in branches { - branch_angles.push(current_angle + angles.at(calc.rem(angle_idx, angles.len()))) - angle_idx += 1 + let new_branches = apply_angles_to_branches(branches, branch_angles, calculate_angles_fn) + return unit + (branches: new_branches) +} + +// Calculate angle for main chain position (zigzag pattern) +#let get_next_chain_angle(current_angle, position) = { + let angle_delta = if calc.rem(position, 2) == 0 { + IUPAC_ANGLES.zigzag_up + } else { + IUPAC_ANGLES.zigzag_down } - - return branch_angles + return current_angle + angle_delta } -// Main angle calculation -#let calculate_angles(molecule) = { - if molecule == none or molecule.type != "molecule" { return molecule } - - let chain_length = if molecule.rest != none { molecule.rest.len() } else { 0 } - let current_angle = (IUPAC_ANGLES.main_chain_initial)(chain_length) - - // Create new first unit with angles - let new_first = molecule.first - if molecule.first != none { - let unit = molecule.first - let next_bond = if molecule.rest != none and molecule.rest.len() > 0 { - molecule.rest.at(0).bond - } else { none } - - let branch_angles = calc_unit_angles(unit, none, next_bond, current_angle) - - // Create new branches with angles - let new_branches = () - let branches = unit.at("branches", default: ()) - if branches != none and branches.len() > 0 { - for (b_idx, branch) in branches.enumerate() { - if b_idx < branch_angles.len() { - // Create new bond with angle - let new_bond = if branch.bond != none { - branch.bond + (relative: branch_angles.at(b_idx)) - } else { branch.bond } - - // Recursively calculate angles for branch body - let new_body = if branch.at("body", default: none) != none { - calculate_angles(branch.body) - } else { branch.at("body", default: none) } - - // Create new branch with updated bond and body - let new_branch = ( - type: branch.type, - bond: new_bond, - body: new_body - ) - new_branches.push(new_branch) - } else { - new_branches.push(branch) - } - } - } - new_first = unit + (branches: new_branches) +// Process the first unit of the molecule +#let process_first_unit(unit, next_bond, initial_angle, calculate_angles_fn, root: false) = { + if unit == none { return unit } + + let branch_angles = calc_unit_angles(unit, none, next_bond, initial_angle, 0) + if root { + branch_angles = branch_angles.map(angle => angle + 180deg) } + return create_unit_with_branch_angles(unit, branch_angles, calculate_angles_fn) +} + +// Process a single rest unit +#let process_rest_unit(item, index, rest, current_angle, calculate_angles_fn) = { + // Calculate bond angle for this position + let new_angle = get_next_chain_angle(current_angle, index) + + // Update bond with absolute angle + let new_bond = item.bond + (absolute: new_angle) + + // Determine next bond for angle calculation + let next_bond = if index + 1 < rest.len() { + rest.at(index + 1).bond + } else { none } + + // Calculate branch angles for this unit + let branch_angles = calc_unit_angles( + item.unit, + new_bond, + next_bond, + new_angle, + index + ) + + // Create updated unit + let new_unit = create_unit_with_branch_angles(item.unit, branch_angles, calculate_angles_fn) + + return (bond: new_bond, unit: new_unit, angle: new_angle) +} + +// Process all rest units in the chain +#let process_rest_chain(rest, initial_angle, calculate_angles_fn) = { + if rest == none or rest.len() == 0 { return () } - // Process rest and create new rest array let new_rest = () - if molecule.rest != none { - for (r_idx, item) in molecule.rest.enumerate() { - current_angle += if calc.rem(r_idx, 2) == 0 { - IUPAC_ANGLES.zigzag_up - } else { - IUPAC_ANGLES.zigzag_down - } - - // Create new bond with absolute angle for main chain - let new_bond = item.bond + (absolute: current_angle) - - let unit = item.unit - let prev_bond = new_bond - let next_bond = if r_idx + 1 < molecule.rest.len() { - molecule.rest.at(r_idx + 1).bond - } else { none } - - let branch_angles = calc_unit_angles(unit, prev_bond, next_bond, current_angle) - - // Create new unit with branch angles - let new_unit = unit - let branches = unit.at("branches", default: ()) - if unit != none and branches != none and branches.len() > 0 { - let new_branches = () - for (b_idx, branch) in branches.enumerate() { - if b_idx < branch_angles.len() { - // Create new bond with angle - let new_bond = if branch.bond != none { - branch.bond + (relative: branch_angles.at(b_idx)) - } else { branch.bond } - - // Recursively calculate angles for branch body - let new_body = if branch.at("body", default: none) != none { - calculate_angles(branch.body) - } else { branch.at("body", default: none) } - - // Create new branch with updated bond and body - let new_branch = ( - type: branch.type, - bond: new_bond, - body: new_body - ) - new_branches.push(new_branch) - } else { - new_branches.push(branch) - } - } - new_unit = unit + (branches: new_branches) - } - - new_rest.push((bond: new_bond, unit: new_unit)) - } + let current_angle = initial_angle + + for (idx, item) in rest.enumerate() { + let processed = process_rest_unit(item, idx, rest, current_angle, calculate_angles_fn) + current_angle = processed.angle + new_rest.push((bond: processed.bond, unit: processed.unit)) } - // Return new molecule with angles + return new_rest +} + +// Main angle calculation - orchestrates the refactored helpers +#let calculate_angles(molecule) = { + // Validate input + if molecule == none or molecule.type != "molecule" { + return molecule + } + + // Calculate initial angle based on chain length + let chain_length = molecule.rest.len() + let initial_angle = (IUPAC_ANGLES.main_chain_initial)(chain_length) + + // Process first unit + let next_bond = if molecule.rest.len() > 0 { + molecule.rest.at(0).bond + } else { none } + + let new_first = process_first_unit( + molecule.first, + next_bond, + initial_angle, + calculate_angles, + root: true + ) + + // Process rest of the chain + let new_rest = process_rest_chain( + molecule.rest, + initial_angle, + calculate_angles + ) + return ( type: "molecule", first: new_first, rest: new_rest ) -} \ No newline at end of file +} diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index bd40859..45395cf 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -342,12 +342,7 @@ optional(options-parser), map: parts => { let (_, faces, mol, lbl, opts) = parts - - if type(mol) == array { - let (_, mol, _) = mol - } else { - mol = none - } + mol = if mol != none { let (_, mol, _) = mol; mol } else { () } ( type: "cycle", faces: faces, diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 39ff1c5..01263b4 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -46,7 +46,15 @@ single } - bond-fn(absolute: absolute, relative: relative, name: name, ..options) + if absolute != none and relative != none { + bond-fn(relative: relative, absolute: absolute, name: name, ..options) + } else if relative != none { + bond-fn(relative: relative, name: name, ..options) + } else if absolute != none { + bond-fn(absolute: absolute, name: name, ..options) + } else { + bond-fn(name: name, ..options) + } } #let transform_branch(branch, transform_molecule) = { @@ -62,10 +70,15 @@ } #let transform_cycle(cycle, transform_molecule) = { + let body = transform_molecule(cycle.body) + if body.len() > 0 and body.at(0).type == "fragment" { + body = body.slice(1) + } + return ( type: "cycle", faces: cycle.faces, - body: if cycle.body != none { transform_molecule(cycle.body) } else { none }, + body: body, args: (:), ) } @@ -177,10 +190,24 @@ // Resolve any label references in the transformed molecule resolve_label_references(transformed) } else if term.type == "operator" { + let op = if term.op == "->" { + sym.arrow.r + } else if term.op == "<->" { + sym.arrow.l.r + } else if term.op == "<=>" { + sym.harpoons.ltrb + } else { + eval("$" + term.op + "$") + } + op = math.attach( + math.stretch(op, size: 100% + 2em), + t: [#term.condition-before], b: [#term.condition-after] + ) + (( type: "operator", name: none, - op: eval("$" + term.op + "$"), + op: op, margin: 0.7em, ),) } else { From d429deeff9771567fcc67a030eac8d26cc84286a Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 19 Sep 2025 07:21:11 +0900 Subject: [PATCH 22/30] refactor: transform by 1 recursion pass --- src/elements/molecule/generator.typ | 139 +++++++++++ src/elements/molecule/iupac-angle.typ | 186 ++++----------- src/elements/molecule/merger.typ | 261 --------------------- src/elements/molecule/parser.typ | 10 +- src/elements/molecule/transformer.typ | 316 +++++++++++--------------- tests/molecule-integration/test.typ | 12 +- 6 files changed, 319 insertions(+), 605 deletions(-) create mode 100644 src/elements/molecule/generator.typ delete mode 100644 src/elements/molecule/merger.typ diff --git a/src/elements/molecule/generator.typ b/src/elements/molecule/generator.typ new file mode 100644 index 0000000..239d308 --- /dev/null +++ b/src/elements/molecule/generator.typ @@ -0,0 +1,139 @@ +#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left, cram-hollow-right, cram-hollow-left + +// ============================ Molecule ============================ + +#let generate_fragment(node) = ( + type: "fragment", + atoms: node.atoms, + name: node.at("name", default: none), + links: node.at("links", default: (:)), + lewis: node.options.at("lewis", default: ()), + vertical: node.options.at("vertical", default: false), + count: node.atoms.len(), + colors: node.options.at("colors", default: none), + label: node.at("name", default: none), + ..node.options, +) + +#let generate_bond(bond, angle) = { + let symbol = bond.symbol + let name = bond.at("name", default: none) + let absolute = if angle != none { angle } else { bond.at("absolute", default: none) } + let relative = bond.at("relative", default: none) + let options = bond.options + + let bond-fn = if symbol == "-" { + single + } else if symbol == "=" { + double + } else if symbol == "#" { + triple + } else if symbol == ">" { + cram-filled-right + } else if symbol == "<" { + cram-filled-left + } else if symbol == ":>" { + cram-dashed-right + } else if symbol == "<:" { + cram-dashed-left + } else if symbol == "|>" { + cram-hollow-right + } else if symbol == "<|" { + cram-hollow-left + } else { + single + } + + if absolute != none and relative != none { + bond-fn(relative: relative, absolute: absolute, name: name, ..options) + } else if relative != none { + bond-fn(relative: relative, name: name, ..options) + } else if absolute != none { + bond-fn(absolute: absolute, name: name, ..options) + } else { + bond-fn(name: name, ..options) + } +} + +#let generate_branch(bond, body) = { + return ( + type: "branch", + body: (..bond, ..body), + args: (:), + ) +} + +#let generate_cycle(cycle, body) = { + return ( + type: "cycle", + faces: cycle.faces, + body: body, + args: (:), + ) +} + +#let generate_label_reference(label) = { +} + +#let generate_molecule(molecule) = { + if molecule == none { return () } + if type(molecule) == array { return molecule } + if molecule.type != "molecule" { return () } + + let elements = () + elements += generate_unit(molecule.first) + for item in molecule.rest { + elements += generate_bond(item.bond) + elements += generate_unit(item.unit) + } + return elements +} + +// ============================ Reaction ============================ + +#let generate_term(ctx, molecule) = { + if molecule.type != "molecule" { + return molecule + } + + let transformed = generate_molecule(molecule_with_angles) + return generate_label_references(transformed) +} + +#let generate_operator(operator) = { + let op = if operator.op == "->" { + sym.arrow.r + } else if operator.op == "<->" { + sym.arrow.l.r + } else if operator.op == "<=>" { + sym.harpoons.ltrb + } else { + eval("$" + operator.op + "$") + } + + op = math.attach( + math.stretch(op, size: 100% + 2em), + t: [#term.condition-before], b: [#term.condition-after] + ) + + return ( + type: "operator", + name: none, + op: op, + margin: 0.7em, + ) +} + +#let generate_reaction(reaction) = { + reaction.terms.map(term => { + if term.type == "term" { + generate_term(term.molecule) + } else if term.type == "operator" { + (generate_operator(term),) + } else { + panic("Unknown term type: " + term.type) + } + }).join() +} + +#let generate = generate_reaction diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index fa7019f..1e8f0ba 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -2,177 +2,71 @@ // relative angles #let IUPAC_ANGLES = ( - main_chain_initial: chain_length => if chain_length >= 2 { -30deg } else { -60deg }, - zigzag_up: 60deg, - zigzag_down: -60deg, + main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg } - 60deg, + zigzag: idx => if calc.rem(idx, 2) == 1 { 60deg } else { -60deg }, incoming: -180deg, - sp3: (60deg, -40deg, -80deg, -180deg), + sp3: (60deg, -60deg, -120deg, -180deg), sp2: (60deg, -60deg, -180deg), sp: (0deg, -180deg), - angles: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), - ring: n => range(n).map(i => 180deg - 360deg / n * (i + 1)), + branch_angles: (n, idx) => 180deg - (idx + 1) * 360deg / n, + cycle_edge_angles: (n, idx) => -90deg + (idx + 1) * 360deg / n, + cycle_branch_angles: (n, idx) => -180deg + (idx + 1/2) * 360deg / n, ) -#let hybridization_angles(bonds) = { +// Calculate the angles for the hybridization of the bonds +#let hybridization_angles(bonds, branches_len) = { let n = bonds.len() let triple = bonds.filter(b => b.symbol == "#").len() let double = bonds.filter(b => b.symbol == "=").len() let other = bonds.filter(b => b.symbol != "#" and b.symbol != "=").len() if n == 2 and (triple >= 1 or double >= 2) { IUPAC_ANGLES.sp } - else if n <= 3 and (double >= 1 or other >= 2) { IUPAC_ANGLES.sp2 } - else if n <= 4 { IUPAC_ANGLES.sp3 } - else { (IUPAC_ANGLES.angles)(n) } + else if branches_len <= 1 and (double >= 1 or other >= 2) { IUPAC_ANGLES.sp2 } + else if branches_len <= 2 { IUPAC_ANGLES.sp3 } + else { (IUPAC_ANGLES.branch_angles)(n) } } -#let calc_unit_angles(unit, prev_bond, next_bond, current_angle, index) = { - let branches = unit.at("branches", default: ()) - if branches.len() == 0 { return () } +#let process_bond(ctx, bond) = { + let (n, idx) = ctx.position.last() - let bonds = branches.map(b => b.bond) - if prev_bond != none { bonds.push(prev_bond) } - if next_bond != none { bonds.push(next_bond) } + let angle = if ctx.parent_type == "unit" or ctx.parent_type == none { + // if n == 2 { + // panic(ctx, bond) + // } + ctx.current_angle + (IUPAC_ANGLES.zigzag)(idx) + } else if ctx.parent_type == "cycle" { + ctx.current_angle + (IUPAC_ANGLES.cycle_edge_angles)(n, idx) + } else if ctx.parent_type == "branch" { + ctx.current_angle + (IUPAC_ANGLES.branch_angles)(n, idx) + } else { + panic("Unknown parent type: " + ctx.parent_type) + } - let angles = hybridization_angles(bonds).filter( - angle => angle != IUPAC_ANGLES.incoming - and (calc.rem(index, 2) == 0 or angle != IUPAC_ANGLES.zigzag_up) - and (calc.rem(index, 2) == 1 or angle != IUPAC_ANGLES.zigzag_down) - ) - - return angles + return (ctx + (current_angle: angle), angle) } -// Process all branches of a unit with their calculated angles -#let apply_angles_to_branches(branches, branch_angles, calculate_angles_fn) = { - if branches == none or branches.len() == 0 { return () } - - let new_branches = () - for (idx, branch) in branches.enumerate() { - if idx < branch_angles.len() { - new_branches.push(( - type: "branch", - bond: branch.bond + (relative: branch_angles.at(idx)), - body: calculate_angles_fn(branch.body) - )) - } else { - new_branches.push(branch) - } - } - return new_branches -} +#let process_branch(ctx, unit) = { + let (n, idx) = ctx.position.last() -// Create a new unit with updated branches -#let create_unit_with_branch_angles(unit, branch_angles, calculate_angles_fn) = { - if unit == none { return unit } - let branches = unit.at("branches", default: ()) - if branches == none or branches.len() == 0 { return unit } - - let new_branches = apply_angles_to_branches(branches, branch_angles, calculate_angles_fn) - return unit + (branches: new_branches) -} - -// Calculate angle for main chain position (zigzag pattern) -#let get_next_chain_angle(current_angle, position) = { - let angle_delta = if calc.rem(position, 2) == 0 { - IUPAC_ANGLES.zigzag_up - } else { - IUPAC_ANGLES.zigzag_down - } - return current_angle + angle_delta -} + if branches.len() == 0 { return () } -// Process the first unit of the molecule -#let process_first_unit(unit, next_bond, initial_angle, calculate_angles_fn, root: false) = { - if unit == none { return unit } - - let branch_angles = calc_unit_angles(unit, none, next_bond, initial_angle, 0) - if root { - branch_angles = branch_angles.map(angle => angle + 180deg) - } - return create_unit_with_branch_angles(unit, branch_angles, calculate_angles_fn) -} + let bonds = branches.map(b => b.bond) + if ctx.prev_bond != none { bonds.push(ctx.prev_bond) } + if ctx.next_bond != none { bonds.push(ctx.next_bond) } -// Process a single rest unit -#let process_rest_unit(item, index, rest, current_angle, calculate_angles_fn) = { - // Calculate bond angle for this position - let new_angle = get_next_chain_angle(current_angle, index) - - // Update bond with absolute angle - let new_bond = item.bond + (absolute: new_angle) - - // Determine next bond for angle calculation - let next_bond = if index + 1 < rest.len() { - rest.at(index + 1).bond - } else { none } - - // Calculate branch angles for this unit - let branch_angles = calc_unit_angles( - item.unit, - new_bond, - next_bond, - new_angle, - index + let angles = hybridization_angles(bonds, branches.len()).filter( + angle => (ctx.prev_bond == none or ctx.next_bond == none + or angle != IUPAC_ANGLES.incoming + or angle != IUPAC_ANGLES.zigzag(index)) ) - - // Create updated unit - let new_unit = create_unit_with_branch_angles(item.unit, branch_angles, calculate_angles_fn) - - return (bond: new_bond, unit: new_unit, angle: new_angle) -} -// Process all rest units in the chain -#let process_rest_chain(rest, initial_angle, calculate_angles_fn) = { - if rest == none or rest.len() == 0 { return () } - - let new_rest = () - let current_angle = initial_angle - - for (idx, item) in rest.enumerate() { - let processed = process_rest_unit(item, idx, rest, current_angle, calculate_angles_fn) - current_angle = processed.angle - new_rest.push((bond: processed.bond, unit: processed.unit)) + // first branches of the main chain + if ctx.prev_bond == none and ctx.parent_type == none { + angles = angles.map(angle => angle + 180deg) } - return new_rest -} - -// Main angle calculation - orchestrates the refactored helpers -#let calculate_angles(molecule) = { - // Validate input - if molecule == none or molecule.type != "molecule" { - return molecule - } - - // Calculate initial angle based on chain length - let chain_length = molecule.rest.len() - let initial_angle = (IUPAC_ANGLES.main_chain_initial)(chain_length) - - // Process first unit - let next_bond = if molecule.rest.len() > 0 { - molecule.rest.at(0).bond - } else { none } - - let new_first = process_first_unit( - molecule.first, - next_bond, - initial_angle, - calculate_angles, - root: true - ) - - // Process rest of the chain - let new_rest = process_rest_chain( - molecule.rest, - initial_angle, - calculate_angles - ) - - return ( - type: "molecule", - first: new_first, - rest: new_rest - ) + return angles } diff --git a/src/elements/molecule/merger.typ b/src/elements/molecule/merger.typ deleted file mode 100644 index d475f11..0000000 --- a/src/elements/molecule/merger.typ +++ /dev/null @@ -1,261 +0,0 @@ -// Merger logic for label continuation system - -#import "parser.typ": molecule-parser, parse - -// Parse multiple molecular parts separated by whitespace -#let parse-molecular-parts(input) = { - // Split by whitespace while preserving structure - let parts = () - let current = "" - let depth = 0 - - for char in input { - if char == "(" { depth += 1 } - else if char == ")" { depth -= 1 } - - if depth == 0 and (char == " " or char == "\t" or char == "\n") { - if current != "" { - parts.push(current) - current = "" - } - } else { - current += char - } - } - - if current != "" { - parts.push(current) - } - - // Parse each part - let molecules = () - for part in parts { - let result = parse(molecule-parser(), part) - if result.success { - molecules.push(result.value) - } - } - - return molecules -} - -// Separate main molecule from continuations -#let separate-molecules(molecules) = { - let main = none - let continuations = () - - for mol in molecules { - if mol.at("is_continuation", default: false) { - continuations.push(mol) - } else if main == none { - main = mol - } else { - // Multiple main molecules - merge them sequentially - // This could be extended to handle multiple main molecules - panic("Multiple main molecules found - only one non-continuation molecule allowed") - } - } - - if main == none { - panic("No main molecule found - at least one non-continuation molecule required") - } - - return (main: main, continuations: continuations) -} - -// Build registry of labeled fragments in the molecule -#let build-label-registry(mol) = { - let visit(mol, path, registry) = { - if mol == none { return registry } - - let result = registry - - // Process first unit - if mol.at("first", default: none) != none { - let unit = mol.first - let unit_path = path + ((type: "first"),) - - // Check if node has a label - if unit.node != none and unit.node.type == "fragment" { - if unit.node.at("name", default: none) != none { - result.insert(unit.node.name, (unit: unit, path: unit_path)) - } - } - - // Process branches of first unit - if unit.at("branches", default: ()) != none { - for (idx, branch) in unit.branches.enumerate() { - if branch.at("body", default: none) != none { - result = visit(branch.body, unit_path + ((type: "branch", idx: idx),), result) - } - } - } - } - - // Process rest units - if mol.at("rest", default: ()) != none { - for (idx, item) in mol.rest.enumerate() { - if item.at("unit", default: none) != none { - let unit = item.unit - let unit_path = path + ((type: "rest", idx: idx),) - - // Check if node has a label - if unit.node != none and unit.node.type == "fragment" { - if unit.node.at("name", default: none) != none { - result.insert(unit.node.name, (unit: unit, path: unit_path)) - } - } - - // Process branches - if unit.at("branches", default: ()) != none { - for (b_idx, branch) in unit.branches.enumerate() { - if branch.at("body", default: none) != none { - result = visit(branch.body, unit_path + ((type: "branch", idx: b_idx),), result) - } - } - } - } - } - } - - return result - } - - return visit(mol, (), (:)) -} - -// Create a molecule structure from a continuation -#let create-molecule-from-continuation(cont) = { - // Remove the label reference from the first unit - let new_first = if cont.rest.len() > 0 { - cont.rest.at(0).unit - } else { - (type: "unit", node: (type: "implicit"), branches: ()) - } - - let new_rest = if cont.rest.len() > 1 { - cont.rest.slice(1) - } else { - () - } - - ( - type: "molecule", - first: new_first, - rest: new_rest - ) -} - -// Add branch to a unit at the given path in the molecule -#let add-branch-at-path(mol, path, branch) = { - if path.len() == 0 { return mol } - - let step = path.at(0) - let remaining_path = path.slice(1) - - if step.type == "first" { - if remaining_path.len() == 0 { - // We're at the target unit - if mol.first.at("branches", default: none) == none { - mol.first.branches = () - } - mol.first.branches.push(branch) - } else { - // Continue deeper - mol.first = add-branch-to-unit(mol.first, remaining_path, branch) - } - } else if step.type == "rest" { - let idx = step.idx - if remaining_path.len() == 0 { - // We're at the target unit - if mol.rest.at(idx).unit.at("branches", default: none) == none { - mol.rest.at(idx).unit.branches = () - } - mol.rest.at(idx).unit.branches.push(branch) - } else { - // Continue deeper - mol.rest.at(idx).unit = add-branch-to-unit(mol.rest.at(idx).unit, remaining_path, branch) - } - } - - return mol -} - -// Helper to add branch to a unit with nested branches -#let add-branch-to-unit(unit, path, branch) = { - if path.len() == 0 { - if unit.at("branches", default: none) == none { - unit.branches = () - } - unit.branches.push(branch) - return unit - } - - let step = path.at(0) - if step.type == "branch" { - let idx = step.idx - let remaining_path = path.slice(1) - unit.branches.at(idx).body = add-branch-at-path(unit.branches.at(idx).body, remaining_path, branch) - } - - return unit -} - -// Merge continuations into the main molecule -#let merge-continuations(main, continuations) = { - let registry = build-label-registry(main) - let result = main - - for cont in continuations { - let target_label = cont.continuation_label - - if target_label not in registry { - panic("Label '" + target_label + "' not found in main molecule") - } - - let target_info = registry.at(target_label) - let target_path = target_info.path - - // Get the first bond from the continuation - let cont_bond = if cont.rest.len() > 0 { - cont.rest.at(0).bond - } else { - (type: "bond", symbol: "-") // Default to single bond - } - - // Create branch from continuation - let branch_body = create-molecule-from-continuation(cont) - - let branch = ( - type: "branch", - bond: cont_bond, - body: branch_body - ) - - // Add branch at the target path - result = add-branch-at-path(result, target_path, branch) - } - - return result -} - -// Main entry point for parsing with continuations -#let parse-with-continuations(input) = { - // Parse all molecular parts - let molecules = parse-molecular-parts(input) - - if molecules.len() == 0 { - panic("No valid molecules found in input") - } - - if molecules.len() == 1 { - // Single molecule, no continuations needed - return molecules.at(0) - } - - // Separate main and continuations - let separated = separate-molecules(molecules) - - // Merge continuations into main structure - return merge-continuations(separated.main, separated.continuations) -} \ No newline at end of file diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 45395cf..a368eb7 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -265,10 +265,10 @@ math.attach(parts.element, tl: [#parts.isotope], br: [#parts.subscript]) } else if type == "parenthetical" { let inner = process-atom(parts.atoms) - math.attach([(#inner)], br: [#parts.subscript]) + math.attach([(#inner.join())], br: [#parts.subscript]) } else if type == "complex" { let inner = process-atom(parts.atoms) - [\[#inner\]] + [\[#inner.join()\]] } else { "unkown type: " + type } @@ -342,7 +342,10 @@ optional(options-parser), map: parts => { let (_, faces, mol, lbl, opts) = parts - mol = if mol != none { let (_, mol, _) = mol; mol } else { () } + mol = if mol != none { + let (_, mol, _) = mol + mol + } ( type: "cycle", faces: faces, @@ -530,6 +533,5 @@ ) } - // Success - all input was consumed return reaction_result } diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 01263b4..9ecb47b 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,219 +1,165 @@ -#import "iupac-angle.typ": calculate_angles -#import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left, cram-hollow-right, cram-hollow-left - -#let transform_fragment(node) = { - let atoms = node.atoms - ( - type: "fragment", - atoms: atoms, - name: node.at("name", default: none), - links: node.at("links", default: (:)), - lewis: node.options.at("lewis", default: ()), - vertical: node.options.at("vertical", default: false), - count: atoms.len(), - colors: node.options.at("colors", default: none), - label: node.at("name", default: none), - ..node.options, - ) +#import "iupac-angle.typ": process_bond, IUPAC_ANGLES +#import "generator.typ": * + +#let init_state() = ( + // Position and angle information + position: (), // Position in the molecule + parent_type: none, // Parent structure type + prev_bond: none, // Previous bond information + next_bond: none, // Next bond information (for lookahead) + current_angle: 0deg, // Current absolute angle + visited_labels: (), // Visited labels (prevent circular references) + label_table: (:), // Label table for references +) + +// ============================ Molecule ============================ + +#let transform_fragment(ctx, node) = { + generate_fragment(node) } -#let transform_bond(bond) = { - let symbol = bond.symbol - let name = bond.at("name", default: none) - let absolute = bond.at("absolute", default: none) - let relative = bond.at("relative", default: none) - let options = bond.options - - let bond-fn = if symbol == "-" { - single - } else if symbol == "=" { - double - } else if symbol == "#" { - triple - } else if symbol == ">" { - cram-filled-right - } else if symbol == "<" { - cram-filled-left - } else if symbol == ":>" { - cram-dashed-right - } else if symbol == "<:" { - cram-dashed-left - } else if symbol == "|>" { - cram-hollow-right - } else if symbol == "<|" { - cram-hollow-left - } else { - single - } - - if absolute != none and relative != none { - bond-fn(relative: relative, absolute: absolute, name: name, ..options) - } else if relative != none { - bond-fn(relative: relative, name: name, ..options) - } else if absolute != none { - bond-fn(absolute: absolute, name: name, ..options) - } else { - bond-fn(name: name, ..options) - } +#let transform_bond(ctx, bond) = { + let (ctx, angle) = process_bond(ctx, bond) + + (ctx, generate_bond(bond, angle)) } -#let transform_branch(branch, transform_molecule) = { - let elements = () - elements += transform_bond(branch.bond) - elements += transform_molecule(branch.body) - - return ( - type: "branch", - body: elements, - args: (:), - ) +#let transform_branch(ctx, branch, transform_molecule_fn) = { + let (ctx, bond) = transform_bond(ctx, branch.bond) + let body = transform_molecule_fn(ctx, branch.body) + + generate_branch(bond, body) } -#let transform_cycle(cycle, transform_molecule) = { - let body = transform_molecule(cycle.body) - if body.len() > 0 and body.at(0).type == "fragment" { - body = body.slice(1) +#let transform_cycle(ctx, cycle, transform_molecule_fn) = { + let body = if cycle.body == none { + (single(), single()) + } else { + transform_molecule_fn( + ctx + ( + parent_type: "cycle", + position: ctx.position + (cycle.faces, 0), + ), + cycle.body + ) } - return ( - type: "cycle", - faces: cycle.faces, - body: body, - args: (:), - ) -} + // (hetero, body) = if body.at(0).type == "fragment" { + // (body.at(0), body.slice(1)) + // } + // (hetero, body) = if body.at(n-1).type == "fragment" { + // (body.at(n-1), body.slice(1)) + // } + // 0, n fragment の付け替え -#let transform_label_reference(label) = { - ( - type: "label-reference", - label: label.label, - links: (:), - ) + generate_cycle(cycle, body) } -#let transform_unit(unit, transform_molecule) = { - let elements = () +#let transform_unit(ctx, unit, transform_molecule_fn) = { + if unit == none { return none } - if unit.node != none { - // Debug: log what we're getting - if type(unit.node) == str { - // This shouldn't happen, but if node is a raw string, treat it as a label reference - elements.push(transform_label_reference((type: "label", label: unit.node))) - elements += unit.at("branches", default: ()).map(branch => transform_branch(branch, transform_molecule)) - return elements - } - - // Check if node has a type field (it should always have one from the parser) - let node_type = if type(unit.node) == dictionary { - unit.node.at("type", default: "unknown") - } else { - "unknown" - } - - if node_type == "fragment" { - elements.push(transform_fragment(unit.node)) - } else if node_type == "cycle" { - elements.push(transform_cycle(unit.node, transform_molecule)) - } else if node_type == "label-ref" { - elements.push(transform_label_reference(unit.node)) - } else if node_type == "implicit" { + // Process the node + let node = unit.node + let generated = if node != none { + if node.type == "fragment" { + transform_fragment(ctx, node) + } else if node.type == "cycle" { + transform_cycle(ctx, node, transform_molecule_fn) + } else if node.type == "label-ref" { + generate_label_reference(node) + } else if node.type == "implicit" { // Implicit node, no action needed + none } else { - panic("Unknown node type: " + node_type + " for node: " + repr(unit.node)) + panic("Unknown node type: " + node.type + " for node: " + repr(node)) } + } else { + none } - elements += unit.branches.map(branch => transform_branch(branch, transform_molecule)) - - return elements -} - -#let transform_molecule(molecule) = { - if molecule == none { return () } - if type(molecule) == array { return molecule } - if molecule.type != "molecule" { return () } + // Process branches with proper context + let branches = unit.branches.enumerate().map(((idx, branch)) => { + transform_branch( + ctx + ( + parent_type: "branch", + position: ctx.position + ((unit.branches.len(), idx),) + ), + branch, + transform_molecule_fn + ) + }) - let elements = () - elements += transform_unit(molecule.first, transform_molecule) - for item in molecule.rest { - elements += transform_bond(item.bond) - elements += transform_unit(item.unit, transform_molecule) + if generated != none { + (generated, ..branches) + } else { + branches } - return elements } -// Resolve label references after transformation -#let resolve_label_references(elements) = { - // First pass: collect all labeled atoms and their positions - let label_positions = (:) - let position = 0 - - for (i, element) in elements.enumerate() { - if element.type == "fragment" and element.at("label", default: none) != none { - label_positions.insert(element.label, i) - } - } - - // Second pass: resolve label references - let resolved = elements - for (i, element) in elements.enumerate() { - if element.type == "label-reference" { - let label = element.label - if label in label_positions { - let target_pos = label_positions.at(label) - resolved.at(i) = ( - type: "link", - from: i, - to: target_pos, - bond: single(), // Default to single bond, could be customized - ) - } else { - // Label not found, keep as unresolved reference or error - resolved.at(i) = ( - type: "error", - message: "Unresolved label reference: " + label, - ) - } +#let transform_molecule(ctx, molecule) = { + if molecule == none or molecule.type != "molecule" { return () } + + let chain_length = molecule.rest.len() + ctx += ( + current_angle: (IUPAC_ANGLES.main_chain_initial)(chain_length), + prev_bond: none, + next_bond: if chain_length > 0 { molecule.rest.at(0).bond } else { none }, + position: ctx.position + ((chain_length, 0),) + ) + + // Transform first unit + let first = transform_unit( + ctx, + molecule.first, + transform_molecule + ) + + // Transform rest of chain + let processed_rest = if molecule.rest != none and molecule.rest.len() > 0 { + for (idx, item) in molecule.rest.enumerate() { + let rest_ctx = ctx + ( + prev_bond: ctx.next_bond, + next_bond: if chain_length < molecule.rest.len() { molecule.rest.at(idx + 1).bond } else { none }, + position: ctx.position + ((chain_length, idx + 1),), + ) + + let (rest_ctx, bond) = transform_bond(rest_ctx, item.bond) + let unit = transform_unit(rest_ctx, item.unit, transform_molecule) + ctx = rest_ctx + + (bond, unit) } + } else { + () } - - return resolved + + return first + processed_rest.join() +} + +// ============================ Reaction ============================ + +#let transform_term(ctx, molecule) = { + transform_molecule(ctx + (parent_type: none), molecule) +} + +#let transform_operator(ctx, operator) = { + generate_operator(operator) } -#let transform_reaction(reaction) = { +#let transform_reaction(ctx, reaction) = { reaction.terms.map(term => { if term.type == "term" { - let molecule = term.molecule - let molecule_with_angles = calculate_angles(molecule) - - let transformed = transform_molecule(molecule_with_angles) - // Resolve any label references in the transformed molecule - resolve_label_references(transformed) + transform_term(ctx, term.molecule) } else if term.type == "operator" { - let op = if term.op == "->" { - sym.arrow.r - } else if term.op == "<->" { - sym.arrow.l.r - } else if term.op == "<=>" { - sym.harpoons.ltrb - } else { - eval("$" + term.op + "$") - } - op = math.attach( - math.stretch(op, size: 100% + 2em), - t: [#term.condition-before], b: [#term.condition-after] - ) - - (( - type: "operator", - name: none, - op: op, - margin: 0.7em, - ),) + (transform_operator(ctx, term),) } else { panic("Unknown term type: " + term.type) } }).join() } -#let transform = transform_reaction +#let transform(reaction) = { + let ctx = init_state() + + transform_reaction(ctx, reaction) +} diff --git a/tests/molecule-integration/test.typ b/tests/molecule-integration/test.typ index ebb26a7..1ae9ed3 100644 --- a/tests/molecule-integration/test.typ +++ b/tests/molecule-integration/test.typ @@ -113,18 +113,12 @@ == Complex Natural Compounds (Simplified) === Caffeine Skeleton (Simplified) -#skeletize(molecule("@6(=O)(-N(-CH3))-N-@5(-N(-CH3))=N")) - -=== Cholesterol Skeleton (Simplified) -// #skeletize(molecule("@6-@6-@6-@5(-CH3)(-CH(-CH3)-CH2-CH2-CH2-CH(-CH3)2)")) +#skeletize(molecule("@6((=O)-N(-)-(=O)-@5(-N(-)-=N-=)--N(-)-)")) == Pharmaceutical Skeleton (Simplified) === Aspirin -#skeletize(molecule("@6(-=-=(-O-C-CH3)-=)")) +#skeletize(molecule("@6(-=-(-O-(=O)-CH3)=(-(=O)-OH)-=)")) === Paracetamol -#skeletize(molecule("@6(-OH)---(-NH-C(=O)-CH3)")) - -=== Ibuprofen Skeleton -// #skeletize(molecule("@6(-CH(-CH3)-CH2-CH(-CH3)2)---(-CH(-CH3)-C(=O)-OH)")) +#skeletize(molecule("@6((-OH)-=-(-NH-(=O)-CH3)=-=)")) From f5a9ebb1561ff122efc7182e39638a06ad18038d Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Mon, 22 Sep 2025 09:51:44 +0900 Subject: [PATCH 23/30] feat: initial connecting points --- src/elements/molecule/generator.typ | 76 +++++++++------------------ src/elements/molecule/iupac-angle.typ | 30 +++++++---- src/elements/molecule/transformer.typ | 69 ++++++++++++++---------- 3 files changed, 85 insertions(+), 90 deletions(-) diff --git a/src/elements/molecule/generator.typ b/src/elements/molecule/generator.typ index 239d308..6de7113 100644 --- a/src/elements/molecule/generator.typ +++ b/src/elements/molecule/generator.typ @@ -3,24 +3,26 @@ // ============================ Molecule ============================ #let generate_fragment(node) = ( - type: "fragment", - atoms: node.atoms, - name: node.at("name", default: none), - links: node.at("links", default: (:)), - lewis: node.options.at("lewis", default: ()), - vertical: node.options.at("vertical", default: false), - count: node.atoms.len(), - colors: node.options.at("colors", default: none), - label: node.at("name", default: none), - ..node.options, + ( + type: "fragment", + atoms: node.atoms, + name: node.at("name", default: none), + links: node.at("links", default: (:)), + lewis: node.options.at("lewis", default: ()), + vertical: node.options.at("vertical", default: false), + count: node.atoms.len(), + colors: node.options.at("colors", default: none), + label: node.at("name", default: none), + ..node.options, + ), ) -#let generate_bond(bond, angle) = { +#let generate_bond(bond, angle, options) = { let symbol = bond.symbol let name = bond.at("name", default: none) let absolute = if angle != none { angle } else { bond.at("absolute", default: none) } let relative = bond.at("relative", default: none) - let options = bond.options + let options = if options != (:) { options } else { bond.options } let bond-fn = if symbol == "-" { single @@ -55,25 +57,20 @@ } } -#let generate_branch(bond, body) = { - return ( +#let generate_branch(bond, body) = ( + ( type: "branch", - body: (..bond, ..body), + body: {bond; body}, args: (:), - ) -} - -#let generate_cycle(cycle, body) = { - return ( - type: "cycle", - faces: cycle.faces, - body: body, - args: (:), - ) -} + ), +) -#let generate_label_reference(label) = { -} +#let generate_cycle(cycle, body) = ( + type: "cycle", + faces: cycle.faces, + body: body, + args: (:), +) #let generate_molecule(molecule) = { if molecule == none { return () } @@ -91,15 +88,6 @@ // ============================ Reaction ============================ -#let generate_term(ctx, molecule) = { - if molecule.type != "molecule" { - return molecule - } - - let transformed = generate_molecule(molecule_with_angles) - return generate_label_references(transformed) -} - #let generate_operator(operator) = { let op = if operator.op == "->" { sym.arrow.r @@ -123,17 +111,3 @@ margin: 0.7em, ) } - -#let generate_reaction(reaction) = { - reaction.terms.map(term => { - if term.type == "term" { - generate_term(term.molecule) - } else if term.type == "operator" { - (generate_operator(term),) - } else { - panic("Unknown term type: " + term.type) - } - }).join() -} - -#let generate = generate_reaction diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index 1e8f0ba..c14059d 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -5,6 +5,7 @@ main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg } - 60deg, zigzag: idx => if calc.rem(idx, 2) == 1 { 60deg } else { -60deg }, incoming: -180deg, + straight: 0deg, sp3: (60deg, -60deg, -120deg, -180deg), sp2: (60deg, -60deg, -180deg), @@ -25,21 +26,18 @@ if n == 2 and (triple >= 1 or double >= 2) { IUPAC_ANGLES.sp } else if branches_len <= 1 and (double >= 1 or other >= 2) { IUPAC_ANGLES.sp2 } else if branches_len <= 2 { IUPAC_ANGLES.sp3 } - else { (IUPAC_ANGLES.branch_angles)(n) } + else { range(n).map(i => (IUPAC_ANGLES.branch_angles)(n, i)) } } -#let process_bond(ctx, bond) = { +#let bond-angle(ctx, bond) = { let (n, idx) = ctx.position.last() let angle = if ctx.parent_type == "unit" or ctx.parent_type == none { - // if n == 2 { - // panic(ctx, bond) - // } ctx.current_angle + (IUPAC_ANGLES.zigzag)(idx) } else if ctx.parent_type == "cycle" { ctx.current_angle + (IUPAC_ANGLES.cycle_edge_angles)(n, idx) } else if ctx.parent_type == "branch" { - ctx.current_angle + (IUPAC_ANGLES.branch_angles)(n, idx) + ctx.current_angle } else { panic("Unknown parent type: " + ctx.parent_type) } @@ -47,10 +45,13 @@ return (ctx + (current_angle: angle), angle) } -#let process_branch(ctx, unit) = { +#let unit-angles(ctx, unit) = { let (n, idx) = ctx.position.last() + if ctx.parent_type == "cycle" { + return range(n).map(i => ctx.current_angle + (IUPAC_ANGLES.cycle_branch_angles)(n, i)) + } - let branches = unit.at("branches", default: ()) + let branches = unit.branches if branches.len() == 0 { return () } let bonds = branches.map(b => b.bond) @@ -58,9 +59,8 @@ if ctx.next_bond != none { bonds.push(ctx.next_bond) } let angles = hybridization_angles(bonds, branches.len()).filter( - angle => (ctx.prev_bond == none or ctx.next_bond == none - or angle != IUPAC_ANGLES.incoming - or angle != IUPAC_ANGLES.zigzag(index)) + angle => (ctx.prev_bond == none or angle != IUPAC_ANGLES.incoming) + and (ctx.next_bond == none or angle != (IUPAC_ANGLES.zigzag)(idx + 1)) ) // first branches of the main chain @@ -70,3 +70,11 @@ return angles } + +#let initial-angle(ctx, molecule) = { + if molecule.first.node.type == "cycle" { + return (IUPAC_ANGLES.cycle_branch_angles)(molecule.first.node.faces, -1) + } + + return (IUPAC_ANGLES.main_chain_initial)(molecule.rest.len()) +} diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 9ecb47b..c604c85 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,8 +1,7 @@ -#import "iupac-angle.typ": process_bond, IUPAC_ANGLES +#import "iupac-angle.typ": bond-angle, IUPAC_ANGLES, unit-angles, initial-angle #import "generator.typ": * #let init_state() = ( - // Position and angle information position: (), // Position in the molecule parent_type: none, // Parent structure type prev_bond: none, // Previous bond information @@ -19,21 +18,25 @@ } #let transform_bond(ctx, bond) = { - let (ctx, angle) = process_bond(ctx, bond) + let (ctx, angle) = bond-angle(ctx, bond) - (ctx, generate_bond(bond, angle)) + // connecting points + if ctx.parent_type == "cycle" { + return (ctx, generate_bond(bond, angle, (from: 0, to: 0))) + } + + (ctx, generate_bond(bond, angle, (:))) } #let transform_branch(ctx, branch, transform_molecule_fn) = { let (ctx, bond) = transform_bond(ctx, branch.bond) - let body = transform_molecule_fn(ctx, branch.body) - + let body = transform_molecule_fn(ctx + (parent_type: "unit", ), branch.body) generate_branch(bond, body) } #let transform_cycle(ctx, cycle, transform_molecule_fn) = { let body = if cycle.body == none { - (single(), single()) + range(cycle.faces).map(i => single()).join() } else { transform_molecule_fn( ctx + ( @@ -44,15 +47,23 @@ ) } - // (hetero, body) = if body.at(0).type == "fragment" { - // (body.at(0), body.slice(1)) - // } - // (hetero, body) = if body.at(n-1).type == "fragment" { - // (body.at(n-1), body.slice(1)) - // } - // 0, n fragment の付け替え + let hetero = none + (hetero, body) = if body.at(0).type == "fragment" { + (body.at(0), body.slice(1)) + } else { + (none, body) + } + (hetero, body) = if body.last().type == "fragment" { + (body.last(), body.slice(0, -1)) + } else { + (none, body) + } - generate_cycle(cycle, body) + if hetero != none { + (hetero, generate_cycle(cycle, body)) + } else { + (generate_cycle(cycle, body),) + } } #let transform_unit(ctx, unit, transform_molecule_fn) = { @@ -77,12 +88,14 @@ none } - // Process branches with proper context - let branches = unit.branches.enumerate().map(((idx, branch)) => { + // Process branches + let angles = unit-angles(ctx, unit) + let branches = unit.branches.enumerate().zip(angles).map((((idx, branch), angle)) => { transform_branch( ctx + ( parent_type: "branch", - position: ctx.position + ((unit.branches.len(), idx),) + position: ctx.position + ((unit.branches.len(), idx),), + current_angle: ctx.current_angle + angle, ), branch, transform_molecule_fn @@ -90,9 +103,9 @@ }) if generated != none { - (generated, ..branches) + (..generated, ..branches.join()) } else { - branches + branches.join() } } @@ -101,9 +114,9 @@ let chain_length = molecule.rest.len() ctx += ( - current_angle: (IUPAC_ANGLES.main_chain_initial)(chain_length), + current_angle: initial-angle(ctx, molecule), prev_bond: none, - next_bond: if chain_length > 0 { molecule.rest.at(0).bond } else { none }, + next_bond: if 0 < chain_length { molecule.rest.at(0).bond } else { none }, position: ctx.position + ((chain_length, 0),) ) @@ -115,11 +128,11 @@ ) // Transform rest of chain - let processed_rest = if molecule.rest != none and molecule.rest.len() > 0 { + let rest = if molecule.rest != none and chain_length > 0 { for (idx, item) in molecule.rest.enumerate() { let rest_ctx = ctx + ( prev_bond: ctx.next_bond, - next_bond: if chain_length < molecule.rest.len() { molecule.rest.at(idx + 1).bond } else { none }, + next_bond: if idx + 1 < chain_length { molecule.rest.at(idx + 1).bond } else { none }, position: ctx.position + ((chain_length, idx + 1),), ) @@ -127,13 +140,13 @@ let unit = transform_unit(rest_ctx, item.unit, transform_molecule) ctx = rest_ctx - (bond, unit) + (..bond, ..unit) } } else { () } - return first + processed_rest.join() + return (..first, ..rest) } // ============================ Reaction ============================ @@ -155,11 +168,11 @@ } else { panic("Unknown term type: " + term.type) } - }).join() + }) } #let transform(reaction) = { let ctx = init_state() - transform_reaction(ctx, reaction) + transform_reaction(ctx, reaction).join() } From bcc26e58a6d218b4bccf2b2559870a2859dc839e Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 24 Sep 2025 09:05:55 +0900 Subject: [PATCH 24/30] feat: make rings treated as branch --- src/elements/molecule/parser.typ | 24 +++++++---------------- src/elements/molecule/transformer.typ | 28 ++++++++++++++++----------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index a368eb7..9df9c08 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -12,8 +12,8 @@ // molecule syntax molecule ::= unit (bond unit)* - unit ::= (node | implicit_node) branch* - node ::= fragment | ring | label + unit ::= (node | implicit_node) branch* ring* + node ::= fragment | label implicit_node ::= ε fragment ::= FRAGMENT label? options? @@ -360,7 +360,6 @@ #let node-parser(mol-parser) = choice( fragment-parser, - ring-parser(mol-parser), label-ref-parser ) @@ -373,9 +372,9 @@ ) #let unit-parser(mol-parser) = seq( - optional(node-parser(mol-parser)), many(branch-parser(mol-parser)), + optional(node-parser(mol-parser)), many(branch-parser(mol-parser)), many(ring-parser(mol-parser)), map: parts => { - let (node, branches) = parts + let (node, branches, rings) = parts // Handle label reference as a special unit type if node != none and node.type == "label-ref" { @@ -383,13 +382,14 @@ type: "unit", node: node, branches: branches, - is_continuation_start: true + rings: rings, ) } else { ( type: "unit", node: if node == none { (type: "implicit") } else { node }, - branches: branches + branches: branches, + rings: rings, ) } } @@ -404,18 +404,8 @@ map: nodes => { let (first, rest) = nodes - // Check if molecule starts with a label reference - let is_continuation = first.at("is_continuation_start", default: false) - let continuation_label = if is_continuation and first.node.type == "label-ref" { - first.node.label - } else { - none - } - ( type: "molecule", - is_continuation: is_continuation, - continuation_label: continuation_label, first: first, rest: rest.map(unit => { let (bond, unit) = unit diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index c604c85..cace609 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -5,7 +5,7 @@ position: (), // Position in the molecule parent_type: none, // Parent structure type prev_bond: none, // Previous bond information - next_bond: none, // Next bond information (for lookahead) + next_bond: none, // Next bond information current_angle: 0deg, // Current absolute angle visited_labels: (), // Visited labels (prevent circular references) label_table: (:), // Label table for references @@ -74,18 +74,16 @@ let generated = if node != none { if node.type == "fragment" { transform_fragment(ctx, node) - } else if node.type == "cycle" { - transform_cycle(ctx, node, transform_molecule_fn) } else if node.type == "label-ref" { generate_label_reference(node) } else if node.type == "implicit" { // Implicit node, no action needed - none + () } else { panic("Unknown node type: " + node.type + " for node: " + repr(node)) } } else { - none + () } // Process branches @@ -101,12 +99,20 @@ transform_molecule_fn ) }) - - if generated != none { - (..generated, ..branches.join()) - } else { - branches.join() - } + + // Process rings + let rings = unit.rings.enumerate().map(((idx, ring)) => { + transform_cycle( + ctx + ( + parent_type: "cycle", + position: ctx.position + ((unit.rings.len(), idx),), + ), + ring, + transform_molecule_fn + ) + }) + + (..generated, ..branches.join(), ..rings.join()) } #let transform_molecule(ctx, molecule) = { From 91aa2e9f423f2f0e0367a5af3c19368ba93df9bc Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:11:55 +0900 Subject: [PATCH 25/30] fix: fix some bugs --- src/elements/molecule/iupac-angle.typ | 20 +++++++-------- src/elements/molecule/transformer.typ | 36 ++++++++++++--------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index c14059d..4392a46 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -12,8 +12,8 @@ sp: (0deg, -180deg), branch_angles: (n, idx) => 180deg - (idx + 1) * 360deg / n, - cycle_edge_angles: (n, idx) => -90deg + (idx + 1) * 360deg / n, - cycle_branch_angles: (n, idx) => -180deg + (idx + 1/2) * 360deg / n, + cycle_edge_angles: n => 360deg / n, + cycle_branch_angles: n => -150deg + 180deg / n, ) // Calculate the angles for the hybridization of the bonds @@ -35,7 +35,8 @@ let angle = if ctx.parent_type == "unit" or ctx.parent_type == none { ctx.current_angle + (IUPAC_ANGLES.zigzag)(idx) } else if ctx.parent_type == "cycle" { - ctx.current_angle + (IUPAC_ANGLES.cycle_edge_angles)(n, idx) + let (faces, _) = ctx.position.at(-2) + ctx.current_angle + (IUPAC_ANGLES.cycle_edge_angles)(faces) } else if ctx.parent_type == "branch" { ctx.current_angle } else { @@ -45,13 +46,14 @@ return (ctx + (current_angle: angle), angle) } -#let unit-angles(ctx, unit) = { +#let branch-angles(ctx, branches) = { let (n, idx) = ctx.position.last() + if ctx.parent_type == "cycle" { - return range(n).map(i => ctx.current_angle + (IUPAC_ANGLES.cycle_branch_angles)(n, i)) + let (faces, _) = ctx.position.at(-2) + return ((IUPAC_ANGLES.cycle_branch_angles)(faces),) } - let branches = unit.branches if branches.len() == 0 { return () } let bonds = branches.map(b => b.bond) @@ -67,14 +69,10 @@ if ctx.prev_bond == none and ctx.parent_type == none { angles = angles.map(angle => angle + 180deg) } - + return angles } #let initial-angle(ctx, molecule) = { - if molecule.first.node.type == "cycle" { - return (IUPAC_ANGLES.cycle_branch_angles)(molecule.first.node.faces, -1) - } - return (IUPAC_ANGLES.main_chain_initial)(molecule.rest.len()) } diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index cace609..87b415e 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,4 +1,4 @@ -#import "iupac-angle.typ": bond-angle, IUPAC_ANGLES, unit-angles, initial-angle +#import "iupac-angle.typ": bond-angle, branch-angles, initial-angle #import "generator.typ": * #let init_state() = ( @@ -41,29 +41,23 @@ transform_molecule_fn( ctx + ( parent_type: "cycle", - position: ctx.position + (cycle.faces, 0), + position: ctx.position + ((cycle.faces, 0),), ), cycle.body ) } - let hetero = none - (hetero, body) = if body.at(0).type == "fragment" { - (body.at(0), body.slice(1)) - } else { - (none, body) + let hetero = () + if body.at(0).type == "fragment" { + hetero.push(body.at(0)) + body = body.slice(1) } - (hetero, body) = if body.last().type == "fragment" { - (body.last(), body.slice(0, -1)) - } else { - (none, body) + if body.last().type == "fragment" { + hetero.push(body.last()) + body = body.slice(0, -1) } - if hetero != none { - (hetero, generate_cycle(cycle, body)) - } else { - (generate_cycle(cycle, body),) - } + (..hetero, generate_cycle(cycle, body)) } #let transform_unit(ctx, unit, transform_molecule_fn) = { @@ -87,7 +81,7 @@ } // Process branches - let angles = unit-angles(ctx, unit) + let angles = branch-angles(ctx, unit.branches) let branches = unit.branches.enumerate().zip(angles).map((((idx, branch), angle)) => { transform_branch( ctx + ( @@ -106,6 +100,7 @@ ctx + ( parent_type: "cycle", position: ctx.position + ((unit.rings.len(), idx),), + current_angle: ctx.current_angle, ), ring, transform_molecule_fn @@ -119,11 +114,12 @@ if molecule == none or molecule.type != "molecule" { return () } let chain_length = molecule.rest.len() + let position = ctx.position ctx += ( current_angle: initial-angle(ctx, molecule), prev_bond: none, next_bond: if 0 < chain_length { molecule.rest.at(0).bond } else { none }, - position: ctx.position + ((chain_length, 0),) + position: position + ((chain_length, 0),) ) // Transform first unit @@ -139,7 +135,7 @@ let rest_ctx = ctx + ( prev_bond: ctx.next_bond, next_bond: if idx + 1 < chain_length { molecule.rest.at(idx + 1).bond } else { none }, - position: ctx.position + ((chain_length, idx + 1),), + position: position + ((chain_length, idx + 1),), ) let (rest_ctx, bond) = transform_bond(rest_ctx, item.bond) @@ -152,7 +148,7 @@ () } - return (..first, ..rest) + (..first, ..rest) } // ============================ Reaction ============================ From 1a1848c958e1b7c0edb3679038e5e02f85a4be8a Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:12:36 +0900 Subject: [PATCH 26/30] refactor: edge test cases --- tests/molecule-edge-cases/test.typ | 142 +++++++++-------------------- 1 file changed, 44 insertions(+), 98 deletions(-) diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ index e9dbfef..0c3848b 100644 --- a/tests/molecule-edge-cases/test.typ +++ b/tests/molecule-edge-cases/test.typ @@ -11,7 +11,7 @@ if not parsed.success { return [ == #description - text(fill: red)[ + #text(fill: red)[ Failed to parse "#input": #parsed.error ] ] @@ -23,10 +23,10 @@ [ == #description ✓ Input: #input - // #skeletize(result) + #skeletize(result) #linebreak() Parsed successfully with #parsed.value.terms.len() nodes - // #repr(parsed.value) + // #repr(reaction) #linebreak() #repr(result) // #linebreak() @@ -35,7 +35,7 @@ = Parser edge cases // Empty input -#test-parse("", "Empty input") +// #test-parse("", "Empty input") // Whitespace only #test-parse(" ", "Whitespace only") @@ -44,11 +44,18 @@ #test-parse("C", "Single atom") #test-parse("H", "Single hydrogen") #test-parse("Cl", "Single chlorine") +#test-parse("C123H456", "Very large numbers") // Bond only (no atom) #test-parse("-", "Bond only") #test-parse("=", "Double bond only") #test-parse("#", "Triple bond only") +#test-parse(">", "Arrow bond only") +#test-parse("<", "Arrow bond only") +#test-parse(":>", "Arrow bond only") +#test-parse("<:", "Arrow bond only") +#test-parse("|>", "Arrow bond only") +#test-parse("<|", "Arrow bond only") // Incomplete bond #test-parse("CH3-", "Trailing bond") @@ -63,6 +70,8 @@ #test-parse("CH3-OH)", "Extra closing in chain") // Deeply nested structure +#test-parse("-(-OH)", "Deeply nested (1 levels)") +#test-parse("-(-(-OH))", "Deeply nested (2 levels)") #test-parse("-(-(-(-OH)))", "Deeply nested (3 levels)") #test-parse("-(-(-(-(-OH))))", "Deeply nested (4 levels)") #test-parse("-(-(-(-(-(-OH)))))", "Deeply nested (5 levels)") @@ -82,6 +91,9 @@ // #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH)))))))))))))))))))", "Deeply nested (19 levels)") // #test-parse("-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-(-OH))))))))))))))))))))", "Deeply nested (20 levels)") +#let many-branches = "C(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)" +#test-parse(many-branches, "10 branches on single carbon") + // Complex branching patterns #test-parse("C()()()()", "Empty branches") #test-parse("C(-CH3)()(-OH)", "Mixed empty and filled branches") @@ -95,99 +107,64 @@ = Ring structure edge cases -// Basic rings -#test-parse("@6", "Simple 6-membered ring") -#test-parse("@5", "5-membered ring") -#test-parse("@4", "4-membered ring") +#test-parse("@1", "1-membered ring (invalid)") +#test-parse("@2", "2-membered ring (invalid)") #test-parse("@3", "3-membered ring") +#test-parse("@4", "4-membered ring") +#test-parse("@5", "5-membered ring") +#test-parse("@6", "6-membered ring") #test-parse("@7", "7-membered ring") #test-parse("@8", "8-membered ring") - -// Ring size boundary values -#test-parse("@2", "2-membered ring (chemically impossible)") -#test-parse("@1", "1-membered ring (invalid)") #test-parse("@10", "10-membered ring") -#test-parse("@15", "15-membered ring (macrocycle)") -#test-parse("@20", "20-membered ring (large macrocycle)") +#test-parse("@15", "15-membered ring") +#test-parse("@20", "20-membered ring") // Ring bond patterns #test-parse("@6(------)", "Ring with explicit single bonds") #test-parse("@6(=-=-=-)", "Benzene with alternating bonds") -#test-parse("@6(======)", "Ring with all double bonds (impossible)") -#test-parse("@6(#-----)", "Ring with triple bond (strained)") +#test-parse("@6(-AB-AB-AB-AB-AB-AB)", "connecting points") +#test-parse("@6((-)-(-)-(-)-(-)-(-)-(-)-)", "branches in cycle") +#test-parse("@5((-)-(-)-(-)-(-)-(-)-)", "branches in cycle") +#test-parse("@6((-)(-)-(-)(-)-(-)(-)-(-)(-)-(-)(-)-(-)(-)-)", "2 branches in each edge in cycle") +#test-parse("@6((-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-)", "branches in branches in cycle") +#test-parse("@6((-)-(-)-(-)-(-)-(-)-)", "branches in cycle") // Ring substituents -#test-parse("@6(------CH3)", "Ring with one substituent") -#test-parse("@6(------CH3)(-OH)", "Ring with adjacent substituents") -#test-parse("@6(------CH3)-OH", "Ring with separated substituents") -#test-parse("---(-OH)", "Ring with 1,3-substituents") -#test-parse("@6(------CH3)--(-OH)", "Ring with 1,3-substituents") -#test-parse("@6(------CH3)---(-OH)", "Ring with 1,4-substituents") - -// Ring with complex substituents -#test-parse("@6(--CH2-CH3---)", "Ring with ethyl group") -#test-parse("@6(------CH(-CH3)2)", "Ring with isopropyl group") -#test-parse("@6(-----(-C(=O)-OH)-)", "Ring with carboxyl group") -#test-parse("@6(----CH2-CH2-CH3)(-OH)", "Ring with propyl and hydroxyl") - -// Ring with nested branches -#test-parse("@6(-CH2(-OH))", "Ring with branched substituent") -#test-parse("@6(-CH(-CH3)(-OH))", "Ring with multi-branched substituent") - -// Ring connected to chain #test-parse("CH3-@6", "Methyl attached to ring") #test-parse("@6-CH3", "Ring attached to methyl") +#test-parse("@6(-CH3)", "Ring with adjacent substituents") #test-parse("CH3-@6-CH3", "Ring in middle of chain") #test-parse("CH3-CH2-@6-CH2-CH3", "Ring embedded in chain") - -// Multiple rings -#test-parse("@6-@6", "Two connected rings (biphenyl)") -#test-parse("@6-A", "Two connected rings (biphenyl)") -#test-parse("@6-CH2-@6", "Rings connected by methylene") -#test-parse("@6=@6", "Rings connected by double bond") #test-parse("@6-@5", "Different sized rings connected") +#test-parse("@6-CH2-@6", "Rings connected by methylene") // Invalid ring notation (expected parse error) -// #test-parse("@", "Asterisk without size") -// #test-parse("@0", "Zero-sized ring") -// #test-parse("@-1", "Negative ring size") -// #test-parse("@a", "Non-numeric ring size") -// #test-parse("@6.5", "Decimal ring size") +#test-parse("@", "Asterisk without size") +#test-parse("@0", "Zero-sized ring") +#test-parse("@-1", "Negative ring size") +#test-parse("@a", "Non-numeric ring size") +#test-parse("@6.5", "Decimal ring size") // Ring with empty parentheses -#test-parse("@6()", "Ring with empty parentheses") +// #test-parse("@6()", "Ring with empty parentheses") #test-parse("@6(())", "Ring with nested empty parentheses") -#test-parse("@6(CH3)", "Ring with atom in parentheses (invalid)") +// #test-parse("@6(CH3)", "Ring with atom in parentheses (invalid)") #test-parse("@6(-)", "Ring with only bond") -#test-parse("@6((-))", "Ring with parenthesized bond") +// #test-parse("@6((-))", "Ring with parenthesized bond") #test-parse("@6(-=-=-(-O-CH3)=)", "Ring with carboxyl group") // Label special cases -#test-parse("CH3:", "Label without name") -#test-parse("CH3::", "Double colon") -#test-parse("CH3:label1:label2", "Multiple labels") -#test-parse(":labelonly", "Label without atom") - -// Consecutive bonds -#test-parse("CH3=-CH3", "Mixed bond types") -// Multiple different bonds are grammar errors so omitted -#test-parse("CH3<>CH3", "Consecutive wedge bonds") +// #test-parse("CH3:", "Label without name") +// #test-parse("CH3::", "Double colon") +// #test-parse("CH3:label1:label2", "Multiple labels") +// #test-parse(":labelonly", "Label without atom") // Number processing -#test-parse("C2H6", "Molecular formula style") -#test-parse("CH23", "Large subscript") -#test-parse("C123H456", "Very large numbers") = Conversion edge cases - - // Circular reference possibility // Ring structure nested test is omitted -// Very many branches -#let many-branches = "C(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)(-CH3)" -#test-parse(many-branches, "10 branches on single carbon") - // Interchangeable bond patterns // Complex bond patterns are omitted @@ -196,28 +173,8 @@ = Actually chemically invalid structures -== Chemically impossible but syntactically valid - -// Pentavalent carbon -#test-parse("C(-H)(-H)(-H)(-H)(-H)", "Pentavalent carbon") - -// 2-membered ring -// 2-membered ring test is omitted - -// Triple nested -#test-parse("CH3(-CH2(-CH(-CH2(-OH))))", "Quadruple nested") - -= Boundary value test - -== Minimum case -#test-parse("H", "Single hydrogen") -#test-parse("C", "Single carbon") - -== Maximum case -// Very long atom name += Maximum case #test-parse("CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2OH", "Long atom string") - -// Very long label #test-parse("CH3:verylonglabelnamethatshouldstillwork", "Long label name") = Unicode and special characters @@ -226,14 +183,3 @@ // #test-parse("CH³⁺", "Unicode superscript charge") // #test-parse("CH3–CH2–OH", "En dash bonds") // #test-parse("CH3−CH2−OH", "Minus sign bonds") - -= Performance test input - -// Huge linear structure -// 100 CH2 is too long so omitted - -// Huge branching structure -// 20 branches are omitted - -// Deep nested -// 10 levels of nesting are omitted From a69a01371cfca80e40318fc398a032c7aecf42ec Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:06:34 +0900 Subject: [PATCH 27/30] feat: implement atom processing and angle calculations for molecule transformations --- src/elements/molecule/generator.typ | 50 +++++ src/elements/molecule/iupac-angle.typ | 79 ++++++- src/elements/molecule/molecule.typ | 29 +++ src/elements/molecule/parser.typ | 44 +--- src/elements/molecule/transformer.typ | 295 +++++++++++++++++++------- 5 files changed, 386 insertions(+), 111 deletions(-) diff --git a/src/elements/molecule/generator.typ b/src/elements/molecule/generator.typ index 6de7113..fadcdbc 100644 --- a/src/elements/molecule/generator.typ +++ b/src/elements/molecule/generator.typ @@ -1,5 +1,55 @@ #import "../links.typ": single, double, triple, cram-filled-right, cram-filled-left, cram-dashed-right, cram-dashed-left, cram-hollow-right, cram-hollow-left +// ============================ Atom Processing ============================ + +/// Convert parsed atom structure to Typst math content +#let process-atom(parts) = { + let type = parts.type + + if type == "atoms" { + let base = parts.parts.map(process-atom) + if parts.charge != none { + (math.attach(base.join(), tr: eval("$" + parts.charge + "$")),) + } else { + base + } + } else if type == "abbreviation" { + text(parts.value) + } else if type == "math-text" { + eval(parts.value) + } else if type == "element-group" { + math.attach(parts.element, tl: [#parts.isotope], br: [#parts.subscript]) + } else if type == "parenthetical" { + let inner = process-atom(parts.atoms) + math.attach([(#inner.join())], br: [#parts.subscript]) + } else if type == "complex" { + let inner = process-atom(parts.atoms) + [\[#inner.join()\]] + } else { + "unknown type: " + type + } +} + +/// Extract element names from parsed content and find the first non-H index +/// Returns the index of the first non-H element (0 if all are H or empty) +#let calc-main-index(parts) = { + // Extract element names recursively + let extract(p) = { + if p.type == "atoms" { p.parts.map(extract).flatten() } + else if p.type == "element-group" { (p.element,) } + else if p.type == "parenthetical" or p.type == "complex" { extract(p.atoms) } + else if p.type == "abbreviation" or p.type == "math-text" { (p.value,) } + else { () } + } + let elements = extract(parts) + + // Find first non-H index + for (idx, el) in elements.enumerate() { + if el != "H" { return idx } + } + 0 +} + // ============================ Molecule ============================ #let generate_fragment(node) = ( diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index 4392a46..93807dd 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -1,5 +1,3 @@ -#import "@preview/cetz:0.2.2" - // relative angles #let IUPAC_ANGLES = ( main_chain_initial: chain_length => if chain_length >= 2 { 30deg } else { 0deg } - 60deg, @@ -46,15 +44,78 @@ return (ctx + (current_angle: angle), angle) } +// Calculate relative angle for a ring attached to a main chain unit +// Returns (angle, absolute) tuple, or (none, false) if default behavior should be used +#let ring-angle(ctx, ring, rings_count, idx) = { + if ctx.parent_type == "cycle" { + // Inside a cycle - use context info for polycyclic vs hetero detection + let outer_faces = ctx.at("outer_cycle_faces", default: none) + let outer_bonds = ctx.at("outer_cycle_body_len", default: none) + + // Also check inner ring's bonds vs faces + let inner_faces = ring.faces + let inner_bonds = if ring.body != none and ring.body.type == "molecule" and ring.body.rest != none { + ring.body.rest.len() + } else { 0 } + + // Polycyclic: outer or inner has fewer bonds than faces + let is_polycyclic = outer_bonds < outer_faces or inner_bonds < inner_faces + + if is_polycyclic { + (none, false) + } else { + // Hetero - use branch angle + ((IUPAC_ANGLES.cycle_branch_angles)(outer_faces), false) + } + } else if ctx.prev_bond != none and ctx.next_bond != none { + // MIDDLE of chain - ring goes as a branch + let base = 0deg + if rings_count > 1 { + base = base + 60deg * (idx - (rings_count - 1) / 2) + } + (base, false) + } else if ctx.prev_bond != none or ctx.next_bond != none { + // START or END of chain - ring extends parallel to chain direction + let (_, chain_idx) = ctx.position.last() + let edge = 180deg / ring.faces + let base = if ctx.prev_bond == none { + // START: extend opposite to chain direction + ctx.current_angle + 150deg + edge + } else { + // END: continue in chain direction + // Uses main_chain_initial pattern for consistency + let offset = 120deg + (IUPAC_ANGLES.main_chain_initial)(chain_idx) + (IUPAC_ANGLES.zigzag)(chain_idx) / 2 + ctx.current_angle - offset + edge + } + if rings_count > 1 { + base = base + 60deg * (idx - (rings_count - 1) / 2) + } + (base, false) + } else { + (none, false) + } +} + #let branch-angles(ctx, branches) = { let (n, idx) = ctx.position.last() + if branches.len() == 0 { return () } + if ctx.parent_type == "cycle" { let (faces, _) = ctx.position.at(-2) - return ((IUPAC_ANGLES.cycle_branch_angles)(faces),) - } + let base_angle = (IUPAC_ANGLES.cycle_branch_angles)(faces) - if branches.len() == 0 { return () } + let branch_count = branches.len() + if branch_count == 1 { + return (base_angle,) + } + + // For multiple branches, spread them symmetrically + let spread = 60deg + return range(branch_count).map(i => { + base_angle + spread * (i - (branch_count - 1) / 2) + }) + } let bonds = branches.map(b => b.bond) if ctx.prev_bond != none { bonds.push(ctx.prev_bond) } @@ -76,3 +137,11 @@ #let initial-angle(ctx, molecule) = { return (IUPAC_ANGLES.main_chain_initial)(molecule.rest.len()) } + +/// Check if angle is vertical (around 90deg or 270deg) +/// Used to determine when to connect to main atom instead of H +#let is-vertical-angle(angle) = { + let a = calc.rem(angle / 1deg, 360) * 1deg + if a < 0deg { a += 360deg } + (a > 60deg and a < 120deg) or (a > 240deg and a < 300deg) +} diff --git a/src/elements/molecule/molecule.typ b/src/elements/molecule/molecule.typ index 28c3980..2c29662 100644 --- a/src/elements/molecule/molecule.typ +++ b/src/elements/molecule/molecule.typ @@ -1,6 +1,35 @@ +// Molecule parser and transformer module +// +// This module provides a high-level declarative syntax for chemical structures. +// +// Example usage: +// #skeletize(molecule("CH3-CH2-OH")) // Ethanol +// #skeletize(molecule("@6(-=-=-=)")) // Benzene +// +// Supported syntax: +// - Atoms: C, H, O, N, Cl, etc. +// - Bonds: - (single), = (double), # (triple), > < (wedge), :> <: (dashed wedge) +// - Branches: (bond content) e.g., CH3-CH(-OH)-CH3 +// - Rings: @n e.g., @6 for hexagon, @5 for pentagon +// - Labels: :name e.g., CH3:start +// - Charges: ^+ ^- ^2+ ^3- e.g., NH4^+ +// - Isotopes: ^14C, ^235U +// +// Limitations: +// - Maximum nesting depth: ~11 levels due to Typst's recursion limit +// Deeply nested structures like "-(-(-(-(...)))) " beyond 11 levels will fail +// - This is a limitation of the parser combinator approach in Typst +// #import "parser.typ": alchemist-parser #import "transformer.typ": transform +/// Parse and transform a molecule string into alchemist elements. +/// +/// - content (string): The molecule string to parse +/// - name (string): Optional name for the molecule group +/// - ..args: Additional arguments (reserved for future use) +/// +/// Returns: Array of alchemist elements or error content #let molecule(content, name: none, ..args) = { let parsed = alchemist-parser(content) if not parsed.success { diff --git a/src/elements/molecule/parser.typ b/src/elements/molecule/parser.typ index 9df9c08..4ae67e8 100644 --- a/src/elements/molecule/parser.typ +++ b/src/elements/molecule/parser.typ @@ -48,6 +48,7 @@ */ #import "../../utils/parser-combinator.typ": * +#import "generator.typ": process-atom, calc-main-index // ==================== Utilities ==================== @@ -129,7 +130,10 @@ // key-value pair (e.g., color: red, angle: 45) #let key-value-pair-parser = seq( identifier, token(":"), some(none-of(")")), - map: parts => parts.join() + map: parts => { + let (id, colon, value) = parts + id + colon + value.join() + } ) #let options-parser = seq( @@ -246,33 +250,6 @@ math-text-parser, ) -// Atoms to math content processor -#let process-atom(parts) = { - let type = parts.type - - if type == "atoms" { - let base = parts.parts.map(process-atom) - if parts.charge != none { - (math.attach(base.join(), tr: eval("$" + parts.charge + "$")),) - } else { - base - } - } else if type == "abbreviation" { - text(parts.value) - } else if type == "math-text" { - eval(parts.value) - } else if type == "element-group" { - math.attach(parts.element, tl: [#parts.isotope], br: [#parts.subscript]) - } else if type == "parenthetical" { - let inner = process-atom(parts.atoms) - math.attach([(#inner.join())], br: [#parts.subscript]) - } else if type == "complex" { - let inner = process-atom(parts.atoms) - [\[#inner.join()\]] - } else { - "unkown type: " + type - } -} #let fragment-parser = seq( fragment-content-parser, optional(label-parser), optional(options-parser), @@ -282,7 +259,8 @@ type: "fragment", atoms: process-atom(content), name: label, - options: if options != none { options } else { (:) } + options: if options != none { options.pairs } else { (:) }, + main-index: calc-main-index(content), ) } ) @@ -311,7 +289,7 @@ type: "bond", symbol: symbol, name: label, - options: if options != none { options } else { (:) } + options: if options != none { options.pairs } else { (:) } ) } ) @@ -342,14 +320,10 @@ optional(options-parser), map: parts => { let (_, faces, mol, lbl, opts) = parts - mol = if mol != none { - let (_, mol, _) = mol - mol - } ( type: "cycle", faces: faces, - body: mol, + body: if mol != none { mol.at(1) } else { none }, label: lbl, options: opts ) diff --git a/src/elements/molecule/transformer.typ b/src/elements/molecule/transformer.typ index 87b415e..b177de7 100644 --- a/src/elements/molecule/transformer.typ +++ b/src/elements/molecule/transformer.typ @@ -1,5 +1,6 @@ -#import "iupac-angle.typ": bond-angle, branch-angles, initial-angle +#import "iupac-angle.typ": bond-angle, branch-angles, initial-angle, ring-angle, is-vertical-angle #import "generator.typ": * +#import "../links.typ": single #let init_state() = ( position: (), // Position in the molecule @@ -11,144 +12,290 @@ label_table: (:), // Label table for references ) +/// Get fragment's main-index (first non-H character index) from a unit +#let get-unit-main-index(unit) = { + if unit == none or unit.node == none or unit.node.type != "fragment" { + return none + } + unit.node.at("main-index", default: 0) +} + // ============================ Molecule ============================ #let transform_fragment(ctx, node) = { - generate_fragment(node) + let fragment = generate_fragment(node) + // Register label if present + if node.at("name", default: none) != none { + ctx.label_table.insert(node.name, fragment) + } + (ctx, fragment) } -#let transform_bond(ctx, bond) = { +#let transform_bond(ctx, bond, prev_unit: none, next_unit: none) = { let (ctx, angle) = bond-angle(ctx, bond) - // connecting points + // connecting points - merge with bond.options to preserve stroke: none etc. if ctx.parent_type == "cycle" { - return (ctx, generate_bond(bond, angle, (from: 0, to: 0))) + return (ctx, generate_bond(bond, angle, (from: 0, to: 0, ..bond.options))) + } + + // For vertical bonds, connect to main-index (first non-H character) + let options = bond.options + + if is-vertical-angle(angle) { + let prev_main = get-unit-main-index(prev_unit) + let next_main = get-unit-main-index(next_unit) + if prev_main != none { + options = (from: prev_main, ..options) + } + if next_main != none { + options = (to: next_main, ..options) + } } - (ctx, generate_bond(bond, angle, (:))) + (ctx, generate_bond(bond, angle, options)) +} + +#let transform_branch(ctx, branch, transform_molecule_fn, parent_unit: none) = { + // Get first unit of branch body for vertical bond detection + let first_unit = if branch.body != none and branch.body.type == "molecule" { + branch.body.first + } else { none } + let (ctx, bond) = transform_bond(ctx, branch.bond, prev_unit: parent_unit, next_unit: first_unit) + let branch_ctx = ctx + (parent_type: "unit") + let (branch_ctx, body) = transform_molecule_fn(branch_ctx, branch.body) + // Merge label_table back to parent context + ctx.label_table = branch_ctx.label_table + (ctx, generate_branch(bond, body)) } -#let transform_branch(ctx, branch, transform_molecule_fn) = { - let (ctx, bond) = transform_bond(ctx, branch.bond) - let body = transform_molecule_fn(ctx + (parent_type: "unit", ), branch.body) - generate_branch(bond, body) +/// Find positions of units that have inner rings +#let find-inner-ring-positions(mol) = { + if mol.type != "molecule" { return () } + let positions = () + if mol.first != none and mol.first.rings.len() > 0 { + positions.push(0) + } + for (idx, item) in mol.rest.enumerate() { + if item.unit != none and item.unit.rings.len() > 0 { + positions.push(idx + 1) + } + } + positions } -#let transform_cycle(ctx, cycle, transform_molecule_fn) = { - let body = if cycle.body == none { - range(cycle.faces).map(i => single()).join() +/// Move rings from last unit to second-to-last unit (for hetero case) +#let move-rings-to-earlier-position(mol) = { + let rest = mol.rest + if rest.len() < 2 { return mol } + + let last_unit = rest.last().unit + if last_unit == none or last_unit.rings.len() == 0 { return mol } + + let second_last_unit = rest.at(-2).unit + let merged_rings = if second_last_unit != none and second_last_unit.rings != none { + (..second_last_unit.rings, ..last_unit.rings) } else { - transform_molecule_fn( - ctx + ( - parent_type: "cycle", - position: ctx.position + ((cycle.faces, 0),), - ), - cycle.body + last_unit.rings + } + + let new_second_last_unit = if second_last_unit != none { + (..second_last_unit, rings: merged_rings) + } else { + (type: "unit", node: (type: "implicit"), branches: (), rings: merged_rings) + } + + let new_rest = rest.slice(0, -2) + new_rest.push((..rest.at(-2), unit: new_second_last_unit)) + new_rest.push((..rest.at(-1), unit: (..last_unit, rings: ()))) + + (..mol, rest: new_rest) +} + +/// Insert invisible bonds at specified positions (for polycyclic case) +#let insert-invisible-bonds(mol, positions) = { + let new_rest = mol.rest + let invisible_entry = ( + bond: (type: "bond", symbol: "-", name: none, options: (stroke: none)), + unit: (type: "unit", node: (type: "implicit"), branches: (), rings: ()) + ) + for idx in positions.rev() { + new_rest = (..new_rest.slice(0, idx), invisible_entry, ..new_rest.slice(idx)) + } + (..mol, rest: new_rest) +} + +#let transform_cycle(ctx, cycle, transform_molecule_fn, angle: none, absolute: false) = { + let (body, cycle_ctx) = if cycle.body == none { + (range(cycle.faces).map(i => single()).join(), ctx) + } else { + let outer_body_len = if cycle.body.rest != none { cycle.body.rest.len() } else { 0 } + let cycle_ctx = ctx + ( + parent_type: "cycle", + position: ctx.position + ((cycle.faces, 0),), + outer_cycle_body_len: outer_body_len, + outer_cycle_faces: cycle.faces, ) + + let inner_ring_positions = find-inner-ring-positions(cycle.body) + let bonds_needed = cycle.faces - outer_body_len + + let modified_body = if bonds_needed == 0 and inner_ring_positions.len() > 0 { + // Hetero: move rings earlier so drawer processes them before face-count limit + move-rings-to-earlier-position(cycle.body) + } else if bonds_needed > 0 and inner_ring_positions.len() > 0 { + // Polycyclic: insert invisible bonds + let count = calc.min(bonds_needed, inner_ring_positions.len()) + insert-invisible-bonds(cycle.body, inner_ring_positions.slice(0, count)) + } else { + cycle.body + } + + let (cycle_ctx, transformed) = transform_molecule_fn(cycle_ctx, modified_body) + (transformed, cycle_ctx) } + // Merge label_table back to parent context + ctx.label_table = cycle_ctx.label_table + let hetero = () - if body.at(0).type == "fragment" { - hetero.push(body.at(0)) - body = body.slice(1) - } - if body.last().type == "fragment" { - hetero.push(body.last()) - body = body.slice(0, -1) + if type(body) == array and body.len() > 0 { + if body.at(0).type == "fragment" { + hetero.push(body.at(0)) + body = body.slice(1) + } + if body.len() > 0 and body.last().type == "fragment" { + hetero.push(body.last()) + body = body.slice(0, -1) + } } - (..hetero, generate_cycle(cycle, body)) + // Build cycle dict with angle in args (inline to preserve angle without changing generator.typ) + let cycle_dict = ( + type: "cycle", + faces: cycle.faces, + body: body, + args: if angle != none { + if absolute { (absolute: angle) } else { (relative: angle) } + } else { (:) }, + ) + + // All elements stay in body - drawer handles cycles/branches after links naturally + (ctx, (..hetero, cycle_dict)) } #let transform_unit(ctx, unit, transform_molecule_fn) = { - if unit == none { return none } - + if unit == none { return (ctx, ()) } + // Process the node let node = unit.node - let generated = if node != none { + let (ctx, generated) = if node != none { if node.type == "fragment" { transform_fragment(ctx, node) } else if node.type == "label-ref" { - generate_label_reference(node) + (ctx, generate_label_reference(node)) } else if node.type == "implicit" { // Implicit node, no action needed - () + (ctx, ()) } else { panic("Unknown node type: " + node.type + " for node: " + repr(node)) } } else { - () + (ctx, ()) } - + // Process branches let angles = branch-angles(ctx, unit.branches) - let branches = unit.branches.enumerate().zip(angles).map((((idx, branch), angle)) => { - transform_branch( - ctx + ( - parent_type: "branch", - position: ctx.position + ((unit.branches.len(), idx),), - current_angle: ctx.current_angle + angle, - ), + let branches = () + for ((idx, branch), angle) in unit.branches.enumerate().zip(angles) { + let branch_ctx = ctx + ( + parent_type: "branch", + position: ctx.position + ((unit.branches.len(), idx),), + current_angle: ctx.current_angle + angle, + ) + let (branch_ctx, branch_result) = transform_branch( + branch_ctx, branch, - transform_molecule_fn + transform_molecule_fn, + parent_unit: unit ) - }) + // Merge label_table back + ctx.label_table = branch_ctx.label_table + branches.push(branch_result) + } // Process rings - let rings = unit.rings.enumerate().map(((idx, ring)) => { - transform_cycle( - ctx + ( - parent_type: "cycle", - position: ctx.position + ((unit.rings.len(), idx),), - current_angle: ctx.current_angle, - ), + let rings = () + for (idx, ring) in unit.rings.enumerate() { + let (angle, absolute) = ring-angle(ctx, ring, unit.rings.len(), idx) + let ring_ctx = ctx + ( + parent_type: "cycle", + position: ctx.position + ((unit.rings.len(), idx),), + current_angle: if angle != none { angle } else { ctx.current_angle }, + ) + let (ring_ctx, ring_result) = transform_cycle( + ring_ctx, ring, - transform_molecule_fn + transform_molecule_fn, + angle: angle, + absolute: absolute, ) - }) + // Merge label_table back + ctx.label_table = ring_ctx.label_table + + rings.push(ring_result) + } - (..generated, ..branches.join(), ..rings.join()) + (ctx, (..generated, ..branches.join(), ..rings.join())) } #let transform_molecule(ctx, molecule) = { - if molecule == none or molecule.type != "molecule" { return () } + if molecule == none or molecule.type != "molecule" { return (ctx, ()) } let chain_length = molecule.rest.len() let position = ctx.position + // Preserve current_angle when inside a branch (parent_type == "unit") + // Reset to initial-angle for top-level and cycle body + let base_angle = if ctx.parent_type == "unit" { + ctx.current_angle + } else { + initial-angle(ctx, molecule) + } ctx += ( - current_angle: initial-angle(ctx, molecule), + current_angle: base_angle, prev_bond: none, next_bond: if 0 < chain_length { molecule.rest.at(0).bond } else { none }, position: position + ((chain_length, 0),) ) // Transform first unit - let first = transform_unit( + let (ctx, first) = transform_unit( ctx, molecule.first, transform_molecule ) // Transform rest of chain - let rest = if molecule.rest != none and chain_length > 0 { + let rest = () + let prev_unit = molecule.first + if molecule.rest != none and chain_length > 0 { for (idx, item) in molecule.rest.enumerate() { let rest_ctx = ctx + ( prev_bond: ctx.next_bond, next_bond: if idx + 1 < chain_length { molecule.rest.at(idx + 1).bond } else { none }, position: position + ((chain_length, idx + 1),), ) - - let (rest_ctx, bond) = transform_bond(rest_ctx, item.bond) - let unit = transform_unit(rest_ctx, item.unit, transform_molecule) + + let (rest_ctx, bond) = transform_bond(rest_ctx, item.bond, prev_unit: prev_unit, next_unit: item.unit) + let (rest_ctx, unit) = transform_unit(rest_ctx, item.unit, transform_molecule) ctx = rest_ctx + prev_unit = item.unit - (..bond, ..unit) + rest += (..bond, ..unit) } - } else { - () } - (..first, ..rest) + (ctx, (..first, ..rest)) } // ============================ Reaction ============================ @@ -158,23 +305,29 @@ } #let transform_operator(ctx, operator) = { - generate_operator(operator) + (ctx, generate_operator(operator)) } #let transform_reaction(ctx, reaction) = { - reaction.terms.map(term => { + let result = () + for term in reaction.terms { if term.type == "term" { - transform_term(ctx, term.molecule) + let (ctx_new, transformed) = transform_term(ctx, term.molecule) + ctx = ctx_new + result.push(transformed) } else if term.type == "operator" { - (transform_operator(ctx, term),) + let (ctx_new, transformed) = transform_operator(ctx, term) + ctx = ctx_new + result.push((transformed,)) } else { panic("Unknown term type: " + term.type) } - }) + } + (ctx, result) } #let transform(reaction) = { let ctx = init_state() - - transform_reaction(ctx, reaction).join() + let (_, result) = transform_reaction(ctx, reaction) + result.join() } From 77d74c34c39e91f88633918dafec1b1a4903213b Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:07:02 +0900 Subject: [PATCH 28/30] feat: replace molecule with fragment in various test files for consistency --- tests/cetz-skeleton-anchors/test.typ | 8 +++---- tests/resonance/test.typ | 36 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/cetz-skeleton-anchors/test.typ b/tests/cetz-skeleton-anchors/test.typ index 0465c00..e4e5b63 100644 --- a/tests/cetz-skeleton-anchors/test.typ +++ b/tests/cetz-skeleton-anchors/test.typ @@ -5,17 +5,17 @@ #set page(width: auto, height: auto, margin: 0.5em) #let molecule-R1 = draw-skeleton(name: "mol1", mol-anchor: "east", { - molecule(name: "A", "H_2N") + fragment(name: "A", "H_2N") single() - molecule(name: "B", "CH") + fragment(name: "B", "CH") branch({ single(angle: 6) - molecule( + fragment( "R_1", ) }) single() - molecule(name: "cooh", "COOH") + fragment(name: "cooh", "COOH") }) #canvas({ diff --git a/tests/resonance/test.typ b/tests/resonance/test.typ index bc87a6a..16e3a20 100644 --- a/tests/resonance/test.typ +++ b/tests/resonance/test.typ @@ -8,14 +8,14 @@ config: (angle-increment: 15deg), { import cetz.draw: * - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("E") + fragment("E") }) branch({ double(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -24,7 +24,7 @@ ) }) single(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: -45deg), @@ -33,7 +33,7 @@ name: "to", ) single(angle: 2, name: "from") - molecule("H", name: "H") + fragment("H", name: "H") hobby( stroke: (red), (to: "from", rel: (0, 3pt)), @@ -42,7 +42,7 @@ mark: (end: ">", fill: red), ) plus(atom-sep: 5em) - molecule( + fragment( "B", lewis: ( lewis-double(angle: 180deg), @@ -64,14 +64,14 @@ r: "]", l: "[", { - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ double(angle: 6, name: "double") - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -82,7 +82,7 @@ }) branch({ single(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -102,14 +102,14 @@ operator(math.stretch(sym.arrow.r.l, size: 2em)) - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ single(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(), @@ -120,7 +120,7 @@ }) branch({ single(angle: -2, name: "single") - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -140,14 +140,14 @@ operator(math.stretch(sym.arrow.r.l, size: 2em)) - molecule("C") + fragment("C") branch({ single(angle: 14) - molecule("R") + fragment("R") }) branch({ single(angle: 6) - molecule( + fragment( "O", lewis: ( lewis-double(angle: 0), @@ -158,7 +158,7 @@ }) branch({ double(angle: -2) - molecule( + fragment( "O", lewis: ( lewis-double(angle: -135deg), From b880719d5763db21d7e639774533a7407328c717 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:07:22 +0900 Subject: [PATCH 29/30] feat: add edge case tests for molecule parsing and introduce .gitignore for molecule-ring tests --- tests/molecule-edge-cases/test.typ | 6 ++-- tests/molecule-integration/test.typ | 3 +- tests/molecule-ring/.gitignore | 4 +++ tests/molecule-ring/test.typ | 49 +++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 tests/molecule-ring/.gitignore create mode 100644 tests/molecule-ring/test.typ diff --git a/tests/molecule-edge-cases/test.typ b/tests/molecule-edge-cases/test.typ index 0c3848b..4eb53c2 100644 --- a/tests/molecule-edge-cases/test.typ +++ b/tests/molecule-edge-cases/test.typ @@ -28,7 +28,7 @@ Parsed successfully with #parsed.value.terms.len() nodes // #repr(reaction) #linebreak() - #repr(result) + // #repr(result) // #linebreak() ] } @@ -136,7 +136,9 @@ #test-parse("CH3-@6-CH3", "Ring in middle of chain") #test-parse("CH3-CH2-@6-CH2-CH3", "Ring embedded in chain") #test-parse("@6-@5", "Different sized rings connected") -#test-parse("@6-CH2-@6", "Rings connected by methylene") +#test-parse("@6--@5", "Different sized rings connected") +#test-parse("@6-(-@6)-(-@6)-@6", "Rings connected by methylene") +#test-parse("@3-(-@3)-(-@3)-@3", "Rings connected by methylene") // Invalid ring notation (expected parse error) #test-parse("@", "Asterisk without size") diff --git a/tests/molecule-integration/test.typ b/tests/molecule-integration/test.typ index 1ae9ed3..0824136 100644 --- a/tests/molecule-integration/test.typ +++ b/tests/molecule-integration/test.typ @@ -113,7 +113,8 @@ == Complex Natural Compounds (Simplified) === Caffeine Skeleton (Simplified) -#skeletize(molecule("@6((=O)-N(-)-(=O)-@5(-N(-)-=N-=)--N(-)-)")) +#skeletize(molecule("@6((=O)-N(-)-@5(-N=-N(-)-)=-(=O)-N(-)-)")) +#skeletize(molecule("@6((=O)-N(-)-@5(-N=-N(-)-=)-(=O)-N(-)-)")) == Pharmaceutical Skeleton (Simplified) diff --git a/tests/molecule-ring/.gitignore b/tests/molecule-ring/.gitignore new file mode 100644 index 0000000..40223be --- /dev/null +++ b/tests/molecule-ring/.gitignore @@ -0,0 +1,4 @@ +# generated by tytanic, do not edit + +diff/** +out/** diff --git a/tests/molecule-ring/test.typ b/tests/molecule-ring/test.typ new file mode 100644 index 0000000..47daeff --- /dev/null +++ b/tests/molecule-ring/test.typ @@ -0,0 +1,49 @@ +#import "../../lib.typ": * +#import "../../src/elements/molecule/parser.typ": alchemist-parser +#import "../../src/elements/molecule/transformer.typ": transform +#import "../../src/elements/molecule/molecule.typ": molecule + +// Error handling and edge cases test += Molecule Edge Cases and Error Handling Tests + +#let test-parse(input, description) = { + let parsed = alchemist-parser(input) + if not parsed.success { + return [ + == #description + #text(fill: red)[ + Failed to parse "#input": #parsed.error + ] + ] + } + + let reaction = parsed.value + let result = transform(reaction) + + [ + == #description + ✓ Input: #input + #skeletize(result) + #linebreak() + Parsed successfully with #parsed.value.terms.len() nodes + // #repr(reaction) + #linebreak() + // #repr(result) + // #linebreak() + ] +} + += Parser edge cases +#test-parse("@6((-)-(-)-(-)-(-)-(-)-(-)-)", "ring") +#test-parse("@6((-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-(-(-)-)-)", "ring") +#test-parse("@6(-(-CH3)(-CH3)-----)", "ring") +#test-parse("@6(-(-CH3)-----)", "ring") +#test-parse("CH3-@6-CH3", "ring") +#test-parse("@6-CH3", "ring") +#test-parse("CH3-@6", "ring") +#test-parse("@6(-----@6(-----))", "fused ring (5+5)") +#test-parse("@6(------@6(-----))", "fused ring (6+5)") +#test-parse("@6(-----@6(------))", "fused ring (5+6)") +#test-parse("@6(------@6(------))", "hetero ring (6+6)") + + \ No newline at end of file From 9651fe35bed738fed7fa9e73216c52c8755795a5 Mon Sep 17 00:00:00 2001 From: anko9801 <37263451+anko9801@users.noreply.github.com> Date: Wed, 17 Dec 2025 10:51:20 +0900 Subject: [PATCH 30/30] feat: adjust angle calculations for main chain branches to account for outgoing direction --- src/elements/molecule/iupac-angle.typ | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/elements/molecule/iupac-angle.typ b/src/elements/molecule/iupac-angle.typ index 93807dd..1a4aed9 100644 --- a/src/elements/molecule/iupac-angle.typ +++ b/src/elements/molecule/iupac-angle.typ @@ -127,8 +127,10 @@ ) // first branches of the main chain + // Offset by 180deg + zigzag to face opposite to outgoing direction if ctx.prev_bond == none and ctx.parent_type == none { - angles = angles.map(angle => angle + 180deg) + let outgoing = (IUPAC_ANGLES.zigzag)(idx + 1) + angles = angles.map(angle => angle + 180deg + outgoing) } return angles