From 514928175a9ca6508d278a7b3e0a31ccd86ed9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Sat, 10 Jan 2026 22:48:17 +0100 Subject: [PATCH 1/3] Replace `lazy_static!` with `LazyLock` --- Cargo.toml | 1 - src/braille.rs | 315 +++++++++++++++++++---------------------- src/canonicalize.rs | 151 +++++++++----------- src/chemistry.rs | 15 +- src/infer_intent.rs | 36 ++--- src/interface.rs | 27 ++-- src/lib.rs | 3 - src/prefs.rs | 5 +- src/speech.rs | 11 +- src/tts.rs | 23 ++- src/xpath_functions.rs | 15 +- tests/common/mod.rs | 7 +- 12 files changed, 273 insertions(+), 336 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bf013e23..63f6a518 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ sxd-document = "0.3" sxd-xpath = "0.4" yaml-rust = "0.4" # yaml-rust = { version = "0.11", package = "yaml-rust2" } -lazy_static = "1.4" strum = "0.27.2" strum_macros = "0.27.2" anyhow = "1.0" diff --git a/src/braille.rs b/src/braille.rs index 88817371..4d76ef83 100644 --- a/src/braille.rs +++ b/src/braille.rs @@ -13,6 +13,7 @@ use crate::speech::{BRAILLE_RULES, SpeechRulesWithContext, braille_replace_chars use crate::canonicalize::get_parent; use std::borrow::Cow; use std::ops::Range; +use std::sync::LazyLock; static UEB_PREFIXES: phf::Set = phf_set! { '⠼', '⠈', '⠘', '⠸', '⠐', '⠨', '⠰', '⠠', @@ -544,106 +545,99 @@ fn nemeth_cleanup(pref_manager: Ref, raw_braille: String) -> "↓" => "⠰", // subscript }; - lazy_static! { - // Add an English Letter indicator. This involves finding "single letters". - // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation: - // punctuation or whitespace on the left and right ignoring open/close chars - // https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf - static ref ADD_ENGLISH_LETTER_INDICATOR: Regex = - Regex::new(r"(?P^|W|P.[\u2800-\u28FF]?|,)(?P[\u2800-\u28FF]?⠷)?(?PC?L.)(?P[\u2800-\u28FF]?⠾)?(?PW|P|,|$)").unwrap(); + // Add an English Letter indicator. This involves finding "single letters". + // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation: + // punctuation or whitespace on the left and right ignoring open/close chars + // https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf + static ADD_ENGLISH_LETTER_INDICATOR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?P^|W|P.[\u2800-\u28FF]?|,)(?P[\u2800-\u28FF]?⠷)?(?PC?L.)(?P[\u2800-\u28FF]?⠾)?(?PW|P|,|$)").unwrap() + }); - // Trim braille spaces before and after braille indicators - // In order: fraction, /, cancellation, letter, baseline - // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" - static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = - Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap(); - static ref REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: Regex = - Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap(); - - // Hack to convert non-numeric '.' to numeric '.' - // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2. - static ref DOTS_99_A_2: Regex = Regex::new(r"𝑁⠨mN").unwrap(); - - // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below - static ref REMOVE_SPACE_BEFORE_PUNCTUATION_151: Regex = - Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap(); - static ref REMOVE_SPACE_AFTER_PUNCTUATION_151: Regex = - Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap(); - - // Multipurpose indicator insertion - // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex) - - // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N - static ref MULTI_177_2: Regex = - Regex::new(r"([Ll].)[N𝑁]").unwrap(); - - // keep between numeric subscript and digit ('M' added by subscript rule) - static ref MULTI_177_3: Regex = - Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap(); - - // Add after decimal pt for non-digits except for comma and punctuation - // Note: since "." can be in the middle of a number, there is not necessarily a "N" - // Although not mentioned in 177_5, don't add an 'M' before an 'm' - static ref MULTI_177_5: Regex = - Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap(); - - - // Pattern for rule II.9a (add numeric indicator at start of line or after a space) - // 1. start of line - // 2. optional minus sign (⠤) - // 3. optional typeface indicator - // 4. number (N) - static ref NUM_IND_9A: Regex = - Regex::new(r"(?P^|[,Ww])(?P⠤?)N").unwrap(); - - // Needed after section mark(§), paragraph mark(¶), #, or * - static ref NUM_IND_9C: Regex = - Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap(); - - // Needed after section mark(§), paragraph mark(¶), #, or * - static ref NUM_IND_9D: Regex = - Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap(); - - // Needed after a typeface change or interior shape modifier indicator - static ref NUM_IND_9E: Regex = Regex::new(r"(?P[SB𝔹TIR]+?)N").unwrap(); - static ref NUM_IND_9E_SHAPE: Regex = Regex::new(r"(?P⠸⠫)N").unwrap(); - - // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d) - // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used - static ref NUM_IND_9F: Regex = Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap(); - - // Enclosed list exception - // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars). - // The green book says "at the start" of an item, don't add the numeric indicator. - // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space" - static ref NUM_IND_ENCLOSED_LIST: Regex = Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap(); - - // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…") - // Never use punctuation indicator before these (38-6) - // "…": "⠀⠄⠄⠄" - // "-": "⠸⠤" (hyphen and dash) - // ",": "⠠⠀" -- spacing already added - // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit] - // because this is run after the above rule, some cases are already caught, so don't - // match if there is already a numeric indicator - static ref NUM_IND_9B: Regex = Regex::new(r"(?PP..?)(?P⠤?)N").unwrap(); - - // Before 79b (punctuation) - static ref REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: Regex = Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap(); - - // Most commas have a space after them, but not when followed by a close quote (others?) - static ref NO_SPACE_AFTER_COMMA: Regex = Regex::new(r",P⠴").unwrap(); // captures both single and double close quote - static ref REMOVE_LEVEL_IND_BEFORE_BASELINE: Regex = Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap(); - - // Except for the four chars above, the unicode rules always include a punctuation indicator. - // The cases to remove them (that seem relevant to MathML) are: - // Beginning of line or after a space (V 38.1) - // After a word (38.4) - // 2nd or subsequent punctuation (includes, "-", etc) (38.7) - static ref REMOVE_AFTER_PUNCT_IND: Regex = Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap(); - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r"⠀⠀+").unwrap(); - } + // Trim braille spaces before and after braille indicators + // In order: fraction, /, cancellation, letter, baseline + // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" + static REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: LazyLock = + LazyLock::new(|| Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap()); + static REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: LazyLock = + LazyLock::new(|| Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap()); + + // Hack to convert non-numeric '.' to numeric '.' + // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2. + static DOTS_99_A_2: LazyLock = LazyLock::new(|| Regex::new(r"𝑁⠨mN").unwrap()); + + // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below + static REMOVE_SPACE_BEFORE_PUNCTUATION_151: LazyLock = + LazyLock::new(|| Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap()); + static REMOVE_SPACE_AFTER_PUNCTUATION_151: LazyLock = + LazyLock::new(|| Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap()); + + // Multipurpose indicator insertion + // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex) + + // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N + static MULTI_177_2: LazyLock = LazyLock::new(|| Regex::new(r"([Ll].)[N𝑁]").unwrap()); + + // keep between numeric subscript and digit ('M' added by subscript rule) + static MULTI_177_3: LazyLock = LazyLock::new(|| Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap()); + + // Add after decimal pt for non-digits except for comma and punctuation + // Note: since "." can be in the middle of a number, there is not necessarily a "N" + // Although not mentioned in 177_5, don't add an 'M' before an 'm' + static MULTI_177_5: LazyLock = + LazyLock::new(|| Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap()); + + // Pattern for rule II.9a (add numeric indicator at start of line or after a space) + // 1. start of line + // 2. optional minus sign (⠤) + // 3. optional typeface indicator + // 4. number (N) + static NUM_IND_9A: LazyLock = LazyLock::new(|| Regex::new(r"(?P^|[,Ww])(?P⠤?)N").unwrap()); + + // Needed after section mark(§), paragraph mark(¶), #, or * + static NUM_IND_9C: LazyLock = LazyLock::new(|| Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap()); + + // Needed after section mark(§), paragraph mark(¶), #, or * + static NUM_IND_9D: LazyLock = LazyLock::new(|| Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap()); + + // Needed after a typeface change or interior shape modifier indicator + static NUM_IND_9E: LazyLock = LazyLock::new(|| Regex::new(r"(?P[SB𝔹TIR]+?)N").unwrap()); + static NUM_IND_9E_SHAPE: LazyLock = LazyLock::new(|| Regex::new(r"(?P⠸⠫)N").unwrap()); + + // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d) + // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used + static NUM_IND_9F: LazyLock = LazyLock::new(|| Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap()); + + // Enclosed list exception + // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars). + // The green book says "at the start" of an item, don't add the numeric indicator. + // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space" + static NUM_IND_ENCLOSED_LIST: LazyLock = LazyLock::new(|| Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap()); + + // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…") + // Never use punctuation indicator before these (38-6) + // "…": "⠀⠄⠄⠄" + // "-": "⠸⠤" (hyphen and dash) + // ",": "⠠⠀" -- spacing already added + // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit] + // because this is run after the above rule, some cases are already caught, so don't + // match if there is already a numeric indicator + static NUM_IND_9B: LazyLock = LazyLock::new(|| Regex::new(r"(?PP..?)(?P⠤?)N").unwrap()); + + // Before 79b (punctuation) + static REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: LazyLock = LazyLock::new(|| Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap()); + + // Most commas have a space after them, but not when followed by a close quote (others?) + static NO_SPACE_AFTER_COMMA: LazyLock = LazyLock::new(|| Regex::new(r",P⠴").unwrap()); // captures both single and double close quote + static REMOVE_LEVEL_IND_BEFORE_BASELINE: LazyLock = LazyLock::new(|| Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap()); + + // Except for the four chars above, the unicode rules always include a punctuation indicator. + // The cases to remove them (that seem relevant to MathML) are: + // Beginning of line or after a space (V 38.1) + // After a word (38.4) + // 2nd or subsequent punctuation (includes, "-", etc) (38.7) + static REMOVE_AFTER_PUNCT_IND: LazyLock = LazyLock::new(|| Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap()); + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap()); + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap()); // debug!("Before: \"{}\"", raw_braille); // replacements might overlap at boundaries (e.g., whitespace) -- need to repeat @@ -849,15 +843,13 @@ static LETTER_PREFIXES: phf::Set = phf_set! { 'B', 'I', '𝔹', 'S', 'T', 'D', 'C', '𝐶', '𝑐', }; -lazy_static! { - // Trim braille spaces before and after braille indicators - // In order: fraction, /, cancellation, letter, baseline - // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" - // static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = - // Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap(); - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r"⠀⠀+").unwrap(); -} +// Trim braille spaces before and after braille indicators +// In order: fraction, /, cancellation, letter, baseline +// Note: fraction over is not listed due to example 42(4) which shows a space before the "/" +// static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = +// Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap(); +static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap()); +static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap()); fn is_short_form(chars: &[char]) -> bool { let chars_as_string = chars.iter().map(|ch| ch.to_string()).collect::(); @@ -1115,9 +1107,7 @@ fn ueb_cleanup(pref_manager: Ref, raw_braille: String) -> Str } fn typeface_to_word_mode(braille: &str) -> String { - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new("[BI𝔹STD]").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new("[BI𝔹STD]").unwrap()); // debug!("before typeface fix: '{}'", braille); let mut result = "".to_string(); @@ -1717,8 +1707,7 @@ fn handle_contractions(chars: &[char], mut result: String) -> String { } // It would be much better from an extensibility point of view to read the table in from a file - lazy_static! { - static ref CONTRACTIONS: Vec = vec![ + static CONTRACTIONS: LazyLock> = LazyLock::new(|| { vec![ // 10.3: Strong contractions Replacement{ pattern: to_unicode_braille("and"), replacement: "L⠯"}, Replacement{ pattern: to_unicode_braille("for"), replacement: "L⠿"}, @@ -1759,11 +1748,11 @@ fn handle_contractions(chars: &[char], mut result: String) -> String { Replacement{ pattern: to_unicode_braille("en"), replacement: "⠢"}, Replacement{ pattern: to_unicode_braille("in"), replacement: "⠔"}, - ]; + ] + }); - static ref CONTRACTION_PATTERNS: RegexSet = init_patterns(&CONTRACTIONS); - static ref CONTRACTION_REGEX: Vec = init_regex(&CONTRACTIONS); - } + static CONTRACTION_PATTERNS: LazyLock = LazyLock::new(|| init_patterns(&CONTRACTIONS)); + static CONTRACTION_REGEX: LazyLock> = LazyLock::new(|| init_regex(&CONTRACTIONS)); let mut chars_as_str = chars.iter().collect::(); // debug!(" handle_contractions: examine '{}'", &chars_as_str); @@ -1838,20 +1827,17 @@ static VIETNAM_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { }; fn vietnam_cleanup(pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - // Deal with Vietnamese "rhymes" -- moving accents around - // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement - // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules - // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later - static ref QU_GI_RULE_EXCEPTION: Regex = Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap(); - static ref IUOY_E_RULE: Regex = Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap(); // ie, ue, oe, and ye rule - static ref UO_A_RULE: Regex = Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap(); // ua, oa rule - static ref UU_O_RULE: Regex = Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap(); // uo, ưo rule - static ref UYE_RULE: Regex = Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap(); // uo, ưo rule - static ref UY_RULE: Regex = Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap(); // uo, ưo rule - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap(); - - } + // Deal with Vietnamese "rhymes" -- moving accents around + // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement + // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules + // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later + static QU_GI_RULE_EXCEPTION: LazyLock = LazyLock::new(|| Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap()); + static IUOY_E_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap()); // ie, ue, oe, and ye rule + static UO_A_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap()); // ua, oa rule + static UU_O_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap()); // uo, ưo rule + static UYE_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap()); // uo, ưo rule + static UY_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap()); // uo, ưo rule + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap()); // debug!("vietnam_cleanup: start={}", raw_braille); let result = typeface_to_word_mode(&raw_braille); let result = capitals_to_word_mode(&result); @@ -1946,9 +1932,7 @@ static CMU_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { fn cmu_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref ADD_WHITE_SPACE: Regex = Regex::new(r"𝘄(.)|𝘄$").unwrap(); - } + static ADD_WHITE_SPACE: LazyLock = LazyLock::new(|| Regex::new(r"𝘄(.)|𝘄$").unwrap()); // debug!("cmu_cleanup: start={}", raw_braille); // let result = typeface_to_word_mode(&raw_braille); @@ -2082,12 +2066,10 @@ static FINNISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { }; fn finnish_cleanup(pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap(); - // Numbers need to end with a space, but sometimes there is one there for other reasons - static ref DROP_NUMBER_SEPARATOR: Regex = Regex::new(r"(n.)\)").unwrap(); - static ref NUMBER_MATCH: Regex = Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap(); - } + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap()); + // Numbers need to end with a space, but sometimes there is one there for other reasons + static DROP_NUMBER_SEPARATOR: LazyLock = LazyLock::new(|| Regex::new(r"(n.)\)").unwrap()); + static NUMBER_MATCH: LazyLock = LazyLock::new(|| Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap()); // debug!("finnish_cleanup: start={}", raw_braille); let result = DROP_NUMBER_SEPARATOR.replace_all(&raw_braille, |cap: &Captures| { @@ -2146,10 +2128,8 @@ fn finnish_cleanup(pref_manager: Ref, raw_braille: String) -> fn swedish_cleanup(pref_manager: Ref, raw_braille: String) -> String { // FIX: need to implement this -- this is just a copy of the Vietnam code - lazy_static! { - // Empty bases are ok if they follow whitespace - static ref EMPTY_BASE: Regex = Regex::new(r"(^|[W𝐖w])E").unwrap(); - } + // Empty bases are ok if they follow whitespace + static EMPTY_BASE: LazyLock = LazyLock::new(|| Regex::new(r"(^|[W𝐖w])E").unwrap()); // debug!("swedish_cleanup: start={}", raw_braille); let result = typeface_to_word_mode(&raw_braille); let result = capitals_to_word_mode(&result); @@ -2195,10 +2175,8 @@ fn swedish_cleanup(pref_manager: Ref, raw_braille: String) -> #[allow(non_snake_case)] fn LaTeX_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REMOVE_SPACE: Regex =Regex::new(r" ([\^_,;)\]}])").unwrap(); // '^', '_', ',', ';', ')', ']', '}' - static ref COLLAPSE_SPACES: Regex = Regex::new(r" +").unwrap(); - } + static REMOVE_SPACE: LazyLock = LazyLock::new(|| Regex::new(r" ([\^_,;)\]}])").unwrap()); // '^', '_', ',', ';', ')', ']', '}' + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); // debug!("LaTeX_cleanup: start={}", raw_braille); let result = raw_braille.replace('𝐖', " "); // let result = COLLAPSE_SPACES.replace_all(&raw_braille, "⠀"); @@ -2214,11 +2192,9 @@ fn LaTeX_cleanup(_pref_manager: Ref, raw_braille: String) -> #[allow(non_snake_case)] fn ASCIIMath_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REMOVE_SPACE_BEFORE_OP: Regex = Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap(); - static ref REMOVE_SPACE_AFTER_OP: Regex = Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r" +").unwrap(); - } + static REMOVE_SPACE_BEFORE_OP: LazyLock = LazyLock::new(|| Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap()); + static REMOVE_SPACE_AFTER_OP: LazyLock = LazyLock::new(|| Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap()); + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); // debug!("ASCIIMath_cleanup: start={}", raw_braille); let result = raw_braille.replace("|𝐖__|", "|𝐰__|"); // protect the whitespace to prevent misinterpretation as lfloor let result = result.replace('𝐖', " "); @@ -2369,14 +2345,13 @@ impl BrailleChars { } fn get_braille_nemeth_chars(node: Element, text_range: Option>) -> Result { - lazy_static! { - // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them: - // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman - // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian - // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?P[SB𝔹TIR]*)(?P[EDGVHU]?)(?PC?)(?PL?)(?P[N]?)(?P.)").unwrap(); - } + // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them: + // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman + // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian + // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?P[SB𝔹TIR]*)(?P[EDGVHU]?)(?PC?)(?PL?)(?P[N]?)(?P.)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); // FIX: cover all the options -- use phf::Map let attr_typeface = match math_variant { @@ -2448,11 +2423,10 @@ impl BrailleChars { // this routine merely deals with the mathvariant attr. // Canonicalize has already transformed all chars it can to math alphanumerics, but not all have bold/italic // The typeform/caps transforms to (potentially) word mode are handled later. - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap(); - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap()); + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); let text = BrailleChars::substring(as_text(node), &text_range); @@ -2502,11 +2476,10 @@ impl BrailleChars { // In CMU, we need to replace spaces used for number blocks with "." // For other numbers, we need to add "." to create digit blocks - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap(); - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap()); + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); let text = BrailleChars::substring(as_text(node), &text_range); diff --git a/src/canonicalize.rs b/src/canonicalize.rs index 7dadd054..15ee4382 100644 --- a/src/canonicalize.rs +++ b/src/canonicalize.rs @@ -20,6 +20,7 @@ use std::fmt; use crate::chemistry::*; use unicode_script::Script; use roman_numerals_rs::RomanNumeral; +use std::sync::LazyLock; // FIX: DECIMAL_SEPARATOR should be set by env, or maybe language const DECIMAL_SEPARATOR: &str = "."; @@ -48,60 +49,58 @@ static AMBIGUOUS_OPERATORS: phf::Set<&str> = phf_set! { }; // static vars used when canonicalizing -lazy_static!{ - // lowest priority operator so it is never popped off the stack - static ref LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None }; +// lowest priority operator so it is never popped off the stack +static LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None }; - static ref INVISIBLE_FUNCTION_APPLICATION: &'static OperatorInfo = OPERATORS.get("\u{2061}").unwrap(); - static ref IMPLIED_TIMES: &'static OperatorInfo = OPERATORS.get("\u{2062}").unwrap(); - static ref IMPLIED_INVISIBLE_COMMA: &'static OperatorInfo = OPERATORS.get("\u{2063}").unwrap(); - static ref IMPLIED_INVISIBLE_PLUS: &'static OperatorInfo = OPERATORS.get("\u{2064}").unwrap(); +static INVISIBLE_FUNCTION_APPLICATION: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2061}").unwrap()); +static IMPLIED_TIMES: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2062}").unwrap()); +static IMPLIED_INVISIBLE_COMMA: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2063}").unwrap()); +static IMPLIED_INVISIBLE_PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2064}").unwrap()); - // FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)? - static ref PLUS: &'static OperatorInfo = OPERATORS.get("+").unwrap(); - static ref MINUS: &'static OperatorInfo = OPERATORS.get("-").unwrap(); - static ref PREFIX_MINUS: &'static OperatorInfo = MINUS.next.as_ref().unwrap(); +// FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)? +static PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("+").unwrap()); +static MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("-").unwrap()); +static PREFIX_MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| MINUS.next.as_ref().unwrap()); - static ref TIMES_SIGN: &'static OperatorInfo = OPERATORS.get("×").unwrap(); +static TIMES_SIGN: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("×").unwrap()); - // IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority - static ref IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 851, next: &None - }; - // IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC - static ref IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 901, next: &None - }; - // IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds - static ref IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 905, next: &None - }; - static ref IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4 - op_type: OperatorTypes::INFIX, priority: 881, next: &None - }; +// IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority +static IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 851, next: &None +}; +// IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC +static IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 901, next: &None +}; +// IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds +static IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 905, next: &None +}; +static IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4 + op_type: OperatorTypes::INFIX, priority: 881, next: &None +}; - // Useful static defaults to have available if there is no character match - static ref DEFAULT_OPERATOR_INFO_PREFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::PREFIX, priority: 260, next: &None - }; - static ref DEFAULT_OPERATOR_INFO_INFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 260, next:& None - }; - static ref DEFAULT_OPERATOR_INFO_POSTFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::POSTFIX, priority: 260, next: &None - }; +// Useful static defaults to have available if there is no character match +static DEFAULT_OPERATOR_INFO_PREFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::PREFIX, priority: 260, next: &None +}; +static DEFAULT_OPERATOR_INFO_INFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 260, next:& None +}; +static DEFAULT_OPERATOR_INFO_POSTFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::POSTFIX, priority: 260, next: &None +}; - // avoids having to use Option in some cases - static ref ILLEGAL_OPERATOR_INFO: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 999, next: &None - }; +// avoids having to use Option in some cases +static ILLEGAL_OPERATOR_INFO: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 999, next: &None +}; - // used to tell if an operator is a relational operator - static ref EQUAL_PRIORITY: usize = OPERATORS.get("=").unwrap().priority; +// used to tell if an operator is a relational operator +static EQUAL_PRIORITY: LazyLock = LazyLock::new(|| OPERATORS.get("=").unwrap().priority); - // useful for detecting whitespace - static ref IS_WHITESPACE: Regex = Regex::new(r"^\s+$").unwrap(); // only Unicode whitespace -} +// useful for detecting whitespace +static IS_WHITESPACE: LazyLock = LazyLock::new(|| Regex::new(r"^\s+$").unwrap()); // only Unicode whitespace // Operators are either PREFIX, INFIX, or POSTFIX, but can also have other properties such as LEFT_FENCE bitflags! { @@ -296,7 +295,7 @@ impl<'a, 'op:'a> StackInfo<'a, 'op> { // debug!(" adding '{}' to mrow[{}], operator '{}/{}'", // element_summary(child), self.mrow.children().len(), show_invisible_op_char(child_op.ch), child_op.op.priority); self.mrow.append_child(child); - if ptr_eq(child_op.op, *ILLEGAL_OPERATOR_INFO) { + if ptr_eq(child_op.op, &ILLEGAL_OPERATOR_INFO) { assert!(!self.is_operand); // should not have two operands in a row (ok to add whitespace) self.is_operand = true; } else { @@ -445,12 +444,10 @@ static EMPTY_ELEMENTS: phf::Set<&str> = phf_set! { "mspace", "none", "mprescripts", "mglyph", "malignmark", "maligngroup", "msline", }; -lazy_static! { - // turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match - // two cases because we don't want to have a match for 'Cl', etc. - static ref UPPER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap(); - static ref LOWER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap(); -} +// turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match +// two cases because we don't want to have a match for 'Cl', etc. +static UPPER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap()); +static LOWER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap()); struct CanonicalizeContextPatterns { @@ -741,12 +738,10 @@ impl CanonicalizeContext { /// Returns 'None' if the element should not be in the tree. fn clean_mathml<'a>(&self, mathml: Element<'a>) -> Option> { // Note: this works bottom-up (clean the children first, then this element) - lazy_static! { - static ref IS_PRIME: Regex = Regex::new(r"['′″‴⁗]").unwrap(); + static IS_PRIME: LazyLock = LazyLock::new(|| Regex::new(r"['′″‴⁗]").unwrap()); - // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _") - static ref IS_UNDERSCORE: Regex = Regex::new(r"^[_\u{00A0}]+$").unwrap(); - } + // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _") + static IS_UNDERSCORE: LazyLock = LazyLock::new(|| Regex::new(r"^[_\u{00A0}]+$").unwrap()); static CURRENCY_SYMBOLS: phf::Set<&str> = phf_set! { @@ -1267,9 +1262,7 @@ impl CanonicalizeContext { fn is_proportional_before_colon<'a>(siblings: impl Iterator>) -> Option { // unparsed, so we look at relative priorities to make sure the proportional operator is really the next operator - lazy_static!{ - static ref PROPORTIONAL_PRIORITY: usize = OPERATORS.get("∷").unwrap().priority; - } + static PROPORTIONAL_PRIORITY: LazyLock = LazyLock::new(|| OPERATORS.get("∷").unwrap().priority); for sibling in siblings { let child = as_element(*sibling); if name(child) == "mo" { @@ -1387,9 +1380,7 @@ impl CanonicalizeContext { /// looks for pairs of (letter, pseudoscript) such as x' or p'q' all inside of a single token element fn split_apart_pseudo_scripts<'a>(mi: Element<'a>) -> Option> { - lazy_static!{ - static ref IS_DEGREES_C_or_F: Regex = Regex::new(r"[°º][CF]").unwrap(); - } + static IS_DEGREES_C_or_F: LazyLock = LazyLock::new(|| Regex::new(r"[°º][CF]").unwrap()); let text = as_text(mi); // debug!("split_apart_pseudo_scripts: start text=\"{text}\""); @@ -1980,9 +1971,7 @@ impl CanonicalizeContext { /// under some specific conditions (trying to be a little cautious). /// The returned (mrow) element reuses the arg so tree siblings links remain correct. fn split_points(leaf: Element) -> Option { - lazy_static!{ - static ref IS_UPPERCASE: Regex = Regex::new(r"^[A-Z]+$").unwrap(); - } + static IS_UPPERCASE: LazyLock = LazyLock::new(|| Regex::new(r"^[A-Z]+$").unwrap()); if !IS_UPPERCASE.is_match(as_text(leaf)) { return None; @@ -2186,9 +2175,7 @@ impl CanonicalizeContext { // Check if start..end is a number fn is_likely_a_number(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool { - lazy_static! { - static ref IS_HEX_BLOCK: Regex = Regex::new("[a-eh-z]").unwrap(); - } + static IS_HEX_BLOCK: LazyLock = LazyLock::new(|| Regex::new("[a-eh-z]").unwrap()); // Note: the children of math_or_mrow aren't valid ('children' represents the current state) let end = children.len(); // { @@ -3274,7 +3261,7 @@ impl CanonicalizeContext { } fn canonicalize_mo_text(&self, mo: Element) { - // lazy_static! { + // lazy_static! { (NOTE: std::sync::LazyLock is now used instead) // static ref IS_LIKELY_SCALAR_VARIABLE: Regex = Regex::new("[a-eh-z]").unwrap(); // } @@ -3367,7 +3354,7 @@ impl CanonicalizeContext { }; let found_op_info = if mo_node.attribute_value(CHEMICAL_BOND).is_some() { - Some(&*IMPLIED_CHEMICAL_BOND) + Some(&IMPLIED_CHEMICAL_BOND) } else { OPERATORS.get(as_text(mo_node)) }; @@ -3378,7 +3365,7 @@ impl CanonicalizeContext { let found_op_info = found_op_info.unwrap(); let matching_op_info = find_operator_info(found_op_info, op_type, form.is_some()); - if ptr_eq(matching_op_info, *ILLEGAL_OPERATOR_INFO) { + if ptr_eq(matching_op_info, &ILLEGAL_OPERATOR_INFO) { return op_not_in_operator_dictionary(op_type); } else { return matching_op_info; @@ -4128,7 +4115,7 @@ impl CanonicalizeContext { } return false; } - return ptr_eq(op_on_top.op, &*IMPLIED_TIMES_HIGH_PRIORITY); + return ptr_eq(op_on_top.op, &IMPLIED_TIMES_HIGH_PRIORITY); fn is_trig(node: Element) -> bool { let base_of_name = get_possible_embellished_node(node); @@ -4238,17 +4225,17 @@ impl CanonicalizeContext { base_of_child.remove_attribute("data-was-mo"); set_mathml_name(base_of_child, "mo"); let mut top_of_stack = parse_stack.pop().unwrap(); - top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: &INVISIBLE_FUNCTION_APPLICATION}); // whitespace -- make part of mrow to keep out of parse + top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: *INVISIBLE_FUNCTION_APPLICATION}); // whitespace -- make part of mrow to keep out of parse parse_stack.push(top_of_stack); continue; } // consecutive operands -- add an invisible operator as appropriate current_op = if likely_function_name == FunctionNameCertainty::True { - OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION } + OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION } } else if self.is_mixed_fraction(previous_child, &children[i_child..])? { - OperatorPair{ ch: "\u{2064}", op: &IMPLIED_INVISIBLE_PLUS } + OperatorPair{ ch: "\u{2064}", op: *IMPLIED_INVISIBLE_PLUS } } else if self.is_implied_comma(previous_child, current_child, mrow) { - OperatorPair{ch: "\u{2063}", op: &IMPLIED_INVISIBLE_COMMA } + OperatorPair{ch: "\u{2063}", op: *IMPLIED_INVISIBLE_COMMA } } else if self.is_implied_chemical_bond(previous_child, current_child) { OperatorPair{ch: "\u{2063}", op: &IMPLIED_CHEMICAL_BOND } } else if self.is_implied_separator(previous_child, current_child) { @@ -4256,7 +4243,7 @@ impl CanonicalizeContext { } else if self.is_trig_arg(base_of_previous_child, base_of_child, &mut parse_stack) { OperatorPair{ch: "\u{2062}", op: &IMPLIED_TIMES_HIGH_PRIORITY } } else { - OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES } + OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES } }; if let Some(attr_val) = base_of_child.attribute_value(CHANGED_ATTR) { if attr_val == "data-was-mo" { @@ -4288,16 +4275,16 @@ impl CanonicalizeContext { } } - if !ptr_eq(current_op.op, *ILLEGAL_OPERATOR_INFO) { + if !ptr_eq(current_op.op, &ILLEGAL_OPERATOR_INFO) { if current_op.op.is_left_fence() || current_op.op.is_prefix() { if top(&parse_stack).is_operand { // will end up with operand operand -- need to choose operator associated with prev child // we use the original input here because in this case, we need to look to the right of the ()s to deal with chemical states let likely_function_name = self.is_function_name(as_element(children[i_child-1]), Some(&children[i_child..])); let implied_operator = if likely_function_name== FunctionNameCertainty::True { - OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION } + OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION } } else { - OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES } + OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES } }; // debug!(" adding implied {}", if ptr_eq(implied_operator.op,*IMPLIED_TIMES) {"times"} else {"function apply"}); diff --git a/src/chemistry.rs b/src/chemistry.rs index 8d2f625d..0e9876c3 100644 --- a/src/chemistry.rs +++ b/src/chemistry.rs @@ -45,6 +45,7 @@ use phf::{phf_map, phf_set}; use std::convert::TryInto; use std::collections::HashSet; use std::cmp::Ordering; +use std::sync::LazyLock; use crate::errors::*; @@ -939,10 +940,8 @@ fn likely_chem_superscript(sup: Element) -> isize { // bullet is radical (en.wikipedia.org/wiki/Radical_(chemistry)#Depiction_in_chemical_reactions); mhchem uses dot operator // these can stand alone, be followed by +/- or have a number in front "(2•)-"" [examples from mhchem documentation] // roman numerals are "oxidation state" and range from -4 to +9 - lazy_static! { - static ref MULTIPLE_PLUS_OR_MINUS_OR_DOT: Regex = Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap(); - static ref SINGLE_PLUS_OR_MINUS_OR_DOT: Regex = Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap(); - } + static MULTIPLE_PLUS_OR_MINUS_OR_DOT: LazyLock = LazyLock::new(|| Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap()); + static SINGLE_PLUS_OR_MINUS_OR_DOT: LazyLock = LazyLock::new(|| Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap()); static DOTS: &[char; 3] = &['⋅', '∙', '•']; let sup_name = name(sup); if sup_name == "mo" && MULTIPLE_PLUS_OR_MINUS_OR_DOT.is_match(as_text(sup)) { @@ -1617,11 +1616,9 @@ fn is_equilibrium_constant(mut mathml: Element) -> bool { return name(mathml) == "mi" && as_text(mathml) == "K"; } -lazy_static! { - // Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals. - // All instances seem to be upper case that I've seen. - static ref SMALL_UPPER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap(); -} +// Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals. +// All instances seem to be upper case that I've seen. +static SMALL_UPPER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap()); /// look for "(s), "(l)", "(g)", "(aq)" (could also use [...]) /// this might be called before canonicalization, but in clean_chemistry_mrow, we made sure "( xxx )" is grouped properly diff --git a/src/infer_intent.rs b/src/infer_intent.rs index 7da65768..1e3b685c 100644 --- a/src/infer_intent.rs +++ b/src/infer_intent.rs @@ -15,6 +15,7 @@ use crate::pretty_print::mml_to_string; use crate::xpath_functions::is_leaf; use regex::Regex; use phf::phf_set; +use std::sync::LazyLock; const IMPLICIT_FUNCTION_NAME: &str = "apply-function"; @@ -243,23 +244,24 @@ pub fn intent_speech_for_name(intent_name: &str, verbosity: &str, fixity: &str) // property := S ':' NCName // S := [ \t\n\r]* -lazy_static! { - // The practical restrictions of NCName are that it cannot contain several symbol characters like - // !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters - // Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName. - // NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated - // We follow NC_NAME for the basic latin block, but then allow everything - static ref CONCEPT_OR_LITERAL: Regex = Regex::new( - r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // NC_NAME but simpler - ).unwrap(); - static ref PROPERTY: Regex = Regex::new( - r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // : NC_NAME - ).unwrap(); - static ref ARG_REF: Regex = Regex::new( - r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // $ NC_NAME - ).unwrap(); - static ref NUMBER: Regex = Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap(); -} +// The practical restrictions of NCName are that it cannot contain several symbol characters like +// !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters +// Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName. +// NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated +// We follow NC_NAME for the basic latin block, but then allow everything +static CONCEPT_OR_LITERAL: LazyLock = LazyLock::new(|| { Regex::new( + r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#, // NC_NAME but simpler + ).unwrap() +}); +static PROPERTY: LazyLock = LazyLock::new(|| { Regex::new( + r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#, // : NC_NAME + ).unwrap() +}); +static ARG_REF: LazyLock = LazyLock::new(|| { Regex::new( + r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#, // $ NC_NAME + ).unwrap() +}); +static NUMBER: LazyLock = LazyLock::new(|| Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap()); static TERMINALS_AS_U8: [u8; 3] = [b'(', b',', b')']; // static TERMINALS: [char; 3] = ['(', ',',')']; diff --git a/src/interface.rs b/src/interface.rs index 2d92c2d0..c5b69634 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -18,6 +18,7 @@ use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_wit use crate::navigate::*; use crate::pretty_print::mml_to_string; use crate::xpath_functions::{is_leaf, IsNode}; +use std::sync::LazyLock; #[cfg(feature = "enable-logs")] use std::sync::Once; @@ -91,14 +92,12 @@ pub fn get_version() -> String { /// The ids can be used for sync highlighting if the `Bookmark` API preference is true. pub fn set_mathml(mathml_str: impl AsRef) -> Result { enable_logs(); - lazy_static! { - // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822) - static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap(); - static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap(); - static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap(); // very limited namespace prefix match - static ref PREFIX: Regex = Regex::new(r#"( = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap()); + static MATHJAX_V3: LazyLock = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap()); + static NAMESPACE_DECL: LazyLock = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap()); // very limited namespace prefix match + static PREFIX: LazyLock = LazyLock::new(|| Regex::new(r#"( = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]+?);"#).unwrap()); NAVIGATION_STATE.with(|nav_stack| { nav_stack.borrow_mut().reset(); @@ -668,9 +667,7 @@ pub fn trim_element(e: Element, allow_structure_in_leaves: bool) { // space, tab, newline, carriage return all get collapsed to a single space const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}']; - lazy_static! { - static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap(); - } + static WHITESPACE_MATCH: LazyLock = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap()); if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) { // Assume it is HTML inside of the leaf -- turn the HTML into a string @@ -1106,9 +1103,7 @@ mod tests { set_mathml("𝕞").unwrap(); // need to remove unique ids - lazy_static! { - static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap(); - } + static ID_MATCH: LazyLock = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap()); let entity_str = ID_MATCH.replace_all(&entity_str, ""); let converted_str = ID_MATCH.replace_all(&converted_str, ""); assert_eq!(entity_str, converted_str, "normal entity test failed"); @@ -1137,7 +1132,9 @@ mod tests { fn can_recover_from_invalid_set_rules_dir() { use std::env; // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var - env::set_var("MathCATRulesDir", "MathCATRulesDir"); + unsafe { + env::set_var("MathCATRulesDir", "MathCATRulesDir"); + } assert!(set_rules_dir("someInvalidRulesDir").is_err()); assert!( set_rules_dir(super::super::abs_rules_dir_path()).is_ok(), diff --git a/src/lib.rs b/src/lib.rs index ac7db772..7397c4a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,9 +23,6 @@ pub mod errors { pub use anyhow::{anyhow, bail, Error, Result, Context}; } -#[macro_use] -extern crate lazy_static; - #[macro_use] extern crate bitflags; diff --git a/src/prefs.rs b/src/prefs.rs index 7443511e..39137d40 100644 --- a/src/prefs.rs +++ b/src/prefs.rs @@ -26,6 +26,7 @@ extern crate dirs; use std::cell::RefCell; use std::rc::Rc; use std::path::{Path, PathBuf}; +use std::sync::LazyLock; use crate::speech::{as_str_checked, RulesFor, FileAndTime}; use std::collections::{HashMap, HashSet}; use phf::phf_set; @@ -35,9 +36,7 @@ use crate::errors::*; /// Use to indicate preference not found with Preference::to_string() pub static NO_PREFERENCE: &str = "\u{FFFF}"; -lazy_static! { - static ref DEFAULT_LANG: Yaml = Yaml::String("en".to_string()); -} +static DEFAULT_LANG: LazyLock = LazyLock::new(|| Yaml::String("en".to_string())); // Preferences are recorded here diff --git a/src/speech.rs b/src/speech.rs index 5a85ebe4..ba2b66a9 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -26,6 +26,7 @@ use std::rc::Rc; use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim}; use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR}; use regex::Regex; +use std::sync::LazyLock; pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND"; @@ -565,13 +566,13 @@ impl InsertChildren { } -lazy_static! { - static ref ATTR_NAME_VALUE: Regex = Regex::new( +static ATTR_NAME_VALUE: LazyLock = LazyLock::new(|| { + Regex::new( // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs) - // The quotes can be either single or double quotes + // The quotes can be either single or double quotes r#"(?P[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P[^']+)'|"(?P[^"]+)")"# - ).unwrap(); -} + ).unwrap() +}); // structure used when "intent:" is encountered in a rule // the name is either a string or an xpath that needs evaluation. 99% of the time it is a string diff --git a/src/tts.rs b/src/tts.rs index 11d52fe5..b8492128 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -78,6 +78,7 @@ use std::str::FromStr; use strum_macros::{Display, EnumString}; use regex::Regex; use sxd_xpath::Value; +use std::sync::LazyLock; const MIN_PAUSE:f64 = 50.0; // ms -- avoids clutter of putting out pauses that probably can't be heard const PAUSE_SHORT:f64 = 200.0; // ms @@ -595,9 +596,7 @@ impl TTS { /// The computation is based on the length of the speech strings (after removing tagging). /// There is a bias towards pausing more _after_ longer strings. pub fn compute_auto_pause(&self, prefs: &PreferenceManager, before: &str, after: &str) -> String { - lazy_static! { - static ref REMOVE_XML: Regex = Regex::new(r"<.+?>").unwrap(); // punctuation ending with a '.' - } + static REMOVE_XML: LazyLock = LazyLock::new(|| Regex::new(r"<.+?>").unwrap()); // punctuation ending with a '.' let before_len; let after_len; match self { @@ -653,10 +652,8 @@ impl TTS { fn merge_pauses_none(&self, str: &str) -> String { // punctuation used for pauses is ",", ";" - lazy_static! { - static ref SPACES: Regex = Regex::new(r"\s+([;,])").unwrap(); // two or more pauses - static ref MULTIPLE_PAUSES: Regex = Regex::new(r"([,;][,;]+)").unwrap(); // two or more pauses - } + static SPACES: LazyLock = LazyLock::new(|| Regex::new(r"\s+([;,])").unwrap()); // two or more pauses + static MULTIPLE_PAUSES: LazyLock = LazyLock::new(|| Regex::new(r"([,;][,;]+)").unwrap()); // two or more pauses // we reduce all sequences of two or more pauses to a single medium pause let merges_string = SPACES.replace_all(str, "$1").to_string(); let merges_string = MULTIPLE_PAUSES.replace_all(&merges_string, ";").to_string(); @@ -680,19 +677,15 @@ impl TTS { } fn merge_pauses_sapi5(&self, str: &str) -> String { - lazy_static! { - static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(]+?> *){2,}").unwrap(); // two or more pauses - static ref PAUSE_AMOUNT: Regex = Regex::new(r"msec=.*?(\d+)").unwrap(); // amount after 'time' - } + static CONSECUTIVE_BREAKS: LazyLock = LazyLock::new(|| Regex::new(r"(]+?> *){2,}").unwrap()); // two or more pauses + static PAUSE_AMOUNT: LazyLock = LazyLock::new(|| Regex::new(r"msec=.*?(\d+)").unwrap()); // amount after 'time' let replacement = |amount: usize| format!(""); return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement); } fn merge_pauses_ssml(&self, str: &str) -> String { - lazy_static! { - static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(]+?> *){2,}").unwrap(); // two or more pauses - static ref PAUSE_AMOUNT: Regex = Regex::new(r"time=.*?(\d+)").unwrap(); // amount after 'time' - } + static CONSECUTIVE_BREAKS: LazyLock = LazyLock::new(|| Regex::new(r"(]+?> *){2,}").unwrap()); // two or more pauses + static PAUSE_AMOUNT: LazyLock = LazyLock::new(|| Regex::new(r"time=.*?(\d+)").unwrap()); // amount after 'time' let replacement = |amount: usize| format!(""); return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement); } diff --git a/src/xpath_functions.rs b/src/xpath_functions.rs index ae68edf3..a25c3fe8 100644 --- a/src/xpath_functions.rs +++ b/src/xpath_functions.rs @@ -24,6 +24,7 @@ use regex::Regex; use crate::pretty_print::mml_to_string; use std::cell::{Ref, RefCell}; use std::thread::LocalKey; +use std::sync::LazyLock; use phf::phf_set; use sxd_xpath::function::Error as XPathError; use crate::canonicalize::{as_element, name, get_parent, MATHML_FROM_NAME_ATTR}; @@ -265,9 +266,7 @@ impl IsNode { // Returns true if 'frac' is a common fraction // In this case, the numerator and denominator can be no larger than 'num_limit' and 'denom_limit' fn is_common_fraction(frac: Element, num_limit: usize, denom_limit: usize) -> bool { - lazy_static! { - static ref ALL_DIGITS: Regex = Regex::new(r"\d+").unwrap(); // match one or more digits - } + static ALL_DIGITS: LazyLock = LazyLock::new(|| Regex::new(r"\d+").unwrap()); // match one or more digits if !is_tag(frac, "mfrac") && !is_tag(frac, "fraction"){ return false; @@ -449,9 +448,7 @@ impl ToOrdinal { * Returns the string representation of that number or an error message */ fn convert(number: &str, fractional: bool, plural: bool) -> Option { - lazy_static! { - static ref NO_DIGIT: Regex = Regex::new(r"[^\d]").unwrap(); // match anything except a digit - } + static NO_DIGIT: LazyLock = LazyLock::new(|| Regex::new(r"[^\d]").unwrap()); // match anything except a digit return SPEECH_DEFINITIONS.with(|definitions| { let definitions = definitions.borrow(); let numbers_large = definitions.get_vec("NumbersLarge")?; @@ -1349,10 +1346,8 @@ pub struct FontSizeGuess; // returns original node match isn't found impl FontSizeGuess { pub fn em_from_value(value_with_unit: &str) -> f64 { - lazy_static! { - // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?) - static ref FONT_VALUE: Regex = Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap(); - } + // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?) + static FONT_VALUE: LazyLock = LazyLock::new(|| Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap()); let cap = FONT_VALUE.captures(value_with_unit); if let Some(cap) = cap { if cap.len() == 3 { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 482e588a..d1ea70df 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -2,8 +2,7 @@ #[cfg(test)] use regex::Regex; -extern crate lazy_static; -use lazy_static::lazy_static; +use std::sync::LazyLock; pub use libmathcat::interface::*; @@ -35,9 +34,7 @@ pub fn abs_rules_dir_path() -> String { // Strip spaces from 'str' so comparison doesn't need to worry about spacing #[allow(dead_code)] // used in testing fn strip_spaces(str: &str) -> String { - lazy_static! { - static ref SPACES: Regex = Regex::new(r" +").unwrap(); - } + static SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); return String::from(SPACES.replace_all(str, " ")); } From 4ad39df16f580c9829075e2a258443d98a64eaa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Sat, 10 Jan 2026 22:59:02 +0100 Subject: [PATCH 2/3] fix redundant `LazyLock` imports --- src/chemistry.rs | 1 - src/speech.rs | 1 - src/tts.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/src/chemistry.rs b/src/chemistry.rs index 9b0bd4ef..d9c703b1 100644 --- a/src/chemistry.rs +++ b/src/chemistry.rs @@ -47,7 +47,6 @@ use std::collections::HashSet; use std::cmp::Ordering; use std::sync::LazyLock; use crate::errors::*; -use std::sync::LazyLock; pub static NOT_CHEMISTRY: isize = -10000; // should overwhelm any positive signal diff --git a/src/speech.rs b/src/speech.rs index 994e80c4..3c03cc3d 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -27,7 +27,6 @@ use std::rc::Rc; use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim}; use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR}; use regex::Regex; -use std::sync::LazyLock; pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND"; diff --git a/src/tts.rs b/src/tts.rs index cd6a1dd0..82db48bd 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -79,7 +79,6 @@ use strum_macros::{Display, EnumString}; use regex::Regex; use std::sync::LazyLock; use sxd_xpath::Value; -use std::sync::LazyLock; const MIN_PAUSE:f64 = 50.0; // ms -- avoids clutter of putting out pauses that probably can't be heard const PAUSE_SHORT:f64 = 200.0; // ms From 1c866b4a66e20983b21fb37cec6f493255dcdda2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Sat, 10 Jan 2026 23:00:42 +0100 Subject: [PATCH 3/3] . --- src/chemistry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chemistry.rs b/src/chemistry.rs index d9c703b1..d1d49a19 100644 --- a/src/chemistry.rs +++ b/src/chemistry.rs @@ -45,8 +45,8 @@ use phf::{phf_map, phf_set}; use std::convert::TryInto; use std::collections::HashSet; use std::cmp::Ordering; -use std::sync::LazyLock; use crate::errors::*; +use std::sync::LazyLock; pub static NOT_CHEMISTRY: isize = -10000; // should overwhelm any positive signal