From 5bfea6fdfd7257b84fb8e7b613e1df520ea1fbe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 8 Jan 2026 07:53:32 +0100 Subject: [PATCH 1/2] use LazyLock in prefs.rs --- src/prefs.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/prefs.rs b/src/prefs.rs index f73cf89f..d28466a5 100644 --- a/src/prefs.rs +++ b/src/prefs.rs @@ -26,6 +26,7 @@ extern crate dirs; use std::cell::RefCell; use std::rc::Rc; use std::path::{Path, PathBuf}; +use std::sync::LazyLock; use crate::speech::{as_str_checked, RulesFor, FileAndTime}; use std::collections::{HashMap, HashSet}; use phf::phf_set; @@ -35,9 +36,7 @@ use crate::errors::*; /// Use to indicate preference not found with Preference::to_string() pub static NO_PREFERENCE: &str = "\u{FFFF}"; -lazy_static! { - static ref DEFAULT_LANG: Yaml = Yaml::String("en".to_string()); -} +static DEFAULT_LANG: LazyLock = LazyLock::new(|| Yaml::String("en".to_string())); // Preferences are recorded here From 2f10adff4dbebad0176b5d795aff14165469ea31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 8 Jan 2026 08:51:41 +0100 Subject: [PATCH 2/2] change more parts --- src/chemistry.rs | 15 ++++++--------- src/infer_intent.rs | 36 +++++++++++++++++++----------------- src/interface.rs | 23 +++++++++-------------- src/speech.rs | 11 ++++++----- src/tts.rs | 23 ++++++++--------------- src/xpath_functions.rs | 15 +++++---------- tests/common/mod.rs | 7 ++----- 7 files changed, 55 insertions(+), 75 deletions(-) diff --git a/src/chemistry.rs b/src/chemistry.rs index 8d2f625d..1d94fe32 100644 --- a/src/chemistry.rs +++ b/src/chemistry.rs @@ -46,6 +46,7 @@ use std::convert::TryInto; use std::collections::HashSet; use std::cmp::Ordering; use crate::errors::*; +use std::sync::LazyLock; pub static NOT_CHEMISTRY: isize = -10000; // should overwhelm any positive signal @@ -939,10 +940,8 @@ fn likely_chem_superscript(sup: Element) -> isize { // bullet is radical (en.wikipedia.org/wiki/Radical_(chemistry)#Depiction_in_chemical_reactions); mhchem uses dot operator // these can stand alone, be followed by +/- or have a number in front "(2•)-"" [examples from mhchem documentation] // roman numerals are "oxidation state" and range from -4 to +9 - lazy_static! { - static ref MULTIPLE_PLUS_OR_MINUS_OR_DOT: Regex = Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap(); - static ref SINGLE_PLUS_OR_MINUS_OR_DOT: Regex = Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap(); - } + static MULTIPLE_PLUS_OR_MINUS_OR_DOT: LazyLock = LazyLock::new(|| Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap()); + static SINGLE_PLUS_OR_MINUS_OR_DOT: LazyLock = LazyLock::new(|| Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap()); static DOTS: &[char; 3] = &['⋅', '∙', '•']; let sup_name = name(sup); if sup_name == "mo" && MULTIPLE_PLUS_OR_MINUS_OR_DOT.is_match(as_text(sup)) { @@ -1617,11 +1616,9 @@ fn is_equilibrium_constant(mut mathml: Element) -> bool { return name(mathml) == "mi" && as_text(mathml) == "K"; } -lazy_static! { - // Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals. - // All instances seem to be upper case that I've seen. - static ref SMALL_UPPER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap(); -} +// Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals. +// All instances seem to be upper case that I've seen. +static SMALL_UPPER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap()); /// look for "(s), "(l)", "(g)", "(aq)" (could also use [...]) /// this might be called before canonicalization, but in clean_chemistry_mrow, we made sure "( xxx )" is grouped properly diff --git a/src/infer_intent.rs b/src/infer_intent.rs index 7da65768..4c92e7a0 100644 --- a/src/infer_intent.rs +++ b/src/infer_intent.rs @@ -11,6 +11,7 @@ use crate::speech::SpeechRulesWithContext; use crate::canonicalize::{as_element, as_text, name, create_mathml_element, set_mathml_name, INTENT_ATTR, MATHML_FROM_NAME_ATTR}; use crate::errors::*; use std::fmt; +use std::sync::LazyLock; use crate::pretty_print::mml_to_string; use crate::xpath_functions::is_leaf; use regex::Regex; @@ -243,23 +244,24 @@ pub fn intent_speech_for_name(intent_name: &str, verbosity: &str, fixity: &str) // property := S ':' NCName // S := [ \t\n\r]* -lazy_static! { - // The practical restrictions of NCName are that it cannot contain several symbol characters like - // !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters - // Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName. - // NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated - // We follow NC_NAME for the basic latin block, but then allow everything - static ref CONCEPT_OR_LITERAL: Regex = Regex::new( - r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // NC_NAME but simpler - ).unwrap(); - static ref PROPERTY: Regex = Regex::new( - r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // : NC_NAME - ).unwrap(); - static ref ARG_REF: Regex = Regex::new( - r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // $ NC_NAME - ).unwrap(); - static ref NUMBER: Regex = Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap(); -} +// The practical restrictions of NCName are that it cannot contain several symbol characters like +// !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters +// Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName. +// NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated +// We follow NC_NAME for the basic latin block, but then allow everything +static CONCEPT_OR_LITERAL: LazyLock = LazyLock::new(|| { + Regex::new(r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // NC_NAME but simpler + ).unwrap() +}); +static PROPERTY: LazyLock = LazyLock::new(|| { + Regex::new(r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // : NC_NAME + ).unwrap() +}); +static ARG_REF: LazyLock = LazyLock::new(|| { + Regex::new(r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"# // $ NC_NAME + ).unwrap() +}); +static NUMBER: LazyLock = LazyLock::new(|| Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap()); static TERMINALS_AS_U8: [u8; 3] = [b'(', b',', b')']; // static TERMINALS: [char; 3] = ['(', ',',')']; diff --git a/src/interface.rs b/src/interface.rs index 665e1e0d..6b858e7f 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -3,6 +3,7 @@ #![allow(non_snake_case)] #![allow(clippy::needless_return)] use std::cell::RefCell; +use std::sync::LazyLock; use crate::canonicalize::{as_text, create_mathml_element}; use crate::errors::*; @@ -94,14 +95,12 @@ pub fn get_version() -> String { /// The ids can be used for sync highlighting if the `Bookmark` API preference is true. pub fn set_mathml(mathml_str: String) -> Result { enable_logs(); - lazy_static! { - // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822) - static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap(); - static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap(); - static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap(); // very limited namespace prefix match - static ref PREFIX: Regex = Regex::new(r#"( = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap()); + static MATHJAX_V3: LazyLock = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap()); + static NAMESPACE_DECL: LazyLock = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap()); // very limited namespace prefix match + static PREFIX: LazyLock = LazyLock::new(|| Regex::new(r#"( = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]+?);"#).unwrap()); NAVIGATION_STATE.with(|nav_stack| { nav_stack.borrow_mut().reset(); @@ -670,9 +669,7 @@ pub fn trim_element(e: Element, allow_structure_in_leaves: bool) { // space, tab, newline, carriage return all get collapsed to a single space const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}']; - lazy_static! { - static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap(); - } + static WHITESPACE_MATCH: LazyLock = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap()); if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) { // Assume it is HTML inside of the leaf -- turn the HTML into a string @@ -1108,9 +1105,7 @@ mod tests { set_mathml("𝕞".to_string()).unwrap(); // need to remove unique ids - lazy_static! { - static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap(); - } + static ID_MATCH: LazyLock = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap()); let entity_str = ID_MATCH.replace_all(&entity_str, ""); let converted_str = ID_MATCH.replace_all(&converted_str, ""); assert_eq!(entity_str, converted_str, "normal entity test failed"); diff --git a/src/speech.rs b/src/speech.rs index 091528db..d36c3857 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use std::collections::HashMap; use std::cell::{RefCell, RefMut}; +use std::sync::LazyLock; use sxd_document::dom::{ChildOfElement, Document, Element}; use sxd_document::{Package, QName}; use sxd_xpath::context::Evaluation; @@ -565,13 +566,13 @@ impl InsertChildren { } -lazy_static! { - static ref ATTR_NAME_VALUE: Regex = Regex::new( +static ATTR_NAME_VALUE: LazyLock = LazyLock::new(|| { + Regex::new( // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs) - // The quotes can be either single or double quotes + // The quotes can be either single or double quotes r#"(?P[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P[^']+)'|"(?P[^"]+)")"# - ).unwrap(); -} + ).unwrap() +}); // structure used when "intent:" is encountered in a rule // the name is either a string or an xpath that needs evaluation. 99% of the time it is a string diff --git a/src/tts.rs b/src/tts.rs index 11d52fe5..fcdfc468 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -77,6 +77,7 @@ use std::string::ToString; use std::str::FromStr; use strum_macros::{Display, EnumString}; use regex::Regex; +use std::sync::LazyLock; use sxd_xpath::Value; const MIN_PAUSE:f64 = 50.0; // ms -- avoids clutter of putting out pauses that probably can't be heard @@ -595,9 +596,7 @@ impl TTS { /// The computation is based on the length of the speech strings (after removing tagging). /// There is a bias towards pausing more _after_ longer strings. pub fn compute_auto_pause(&self, prefs: &PreferenceManager, before: &str, after: &str) -> String { - lazy_static! { - static ref REMOVE_XML: Regex = Regex::new(r"<.+?>").unwrap(); // punctuation ending with a '.' - } + static REMOVE_XML: LazyLock = LazyLock::new(|| Regex::new(r"<.+?>").unwrap()); // punctuation ending with a '.' let before_len; let after_len; match self { @@ -653,10 +652,8 @@ impl TTS { fn merge_pauses_none(&self, str: &str) -> String { // punctuation used for pauses is ",", ";" - lazy_static! { - static ref SPACES: Regex = Regex::new(r"\s+([;,])").unwrap(); // two or more pauses - static ref MULTIPLE_PAUSES: Regex = Regex::new(r"([,;][,;]+)").unwrap(); // two or more pauses - } + static SPACES: LazyLock = LazyLock::new(|| Regex::new(r"\s+([;,])").unwrap()); // two or more pauses + static MULTIPLE_PAUSES: LazyLock = LazyLock::new(|| Regex::new(r"([,;][,;]+)").unwrap()); // two or more pauses // we reduce all sequences of two or more pauses to a single medium pause let merges_string = SPACES.replace_all(str, "$1").to_string(); let merges_string = MULTIPLE_PAUSES.replace_all(&merges_string, ";").to_string(); @@ -680,19 +677,15 @@ impl TTS { } fn merge_pauses_sapi5(&self, str: &str) -> String { - lazy_static! { - static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(]+?> *){2,}").unwrap(); // two or more pauses - static ref PAUSE_AMOUNT: Regex = Regex::new(r"msec=.*?(\d+)").unwrap(); // amount after 'time' - } + static CONSECUTIVE_BREAKS: LazyLock = LazyLock::new(|| Regex::new(r"(]+?> *){2,}").unwrap()); // two or more pauses + static PAUSE_AMOUNT: LazyLock = LazyLock::new(|| Regex::new(r"msec=.*?(\d+)").unwrap()); // amount after 'time' let replacement = |amount: usize| format!(""); return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement); } fn merge_pauses_ssml(&self, str: &str) -> String { - lazy_static! { - static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(]+?> *){2,}").unwrap(); // two or more pauses - static ref PAUSE_AMOUNT: Regex = Regex::new(r"time=.*?(\d+)").unwrap(); // amount after 'time' - } + static CONSECUTIVE_BREAKS: LazyLock = LazyLock::new(|| Regex::new(r"(]+?> *){2,}").unwrap()); // two or more pauses + static PAUSE_AMOUNT: LazyLock = LazyLock::new(|| Regex::new(r"time=.*?(\d+)").unwrap()); // amount after 'time' let replacement = |amount: usize| format!(""); return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement); } diff --git a/src/xpath_functions.rs b/src/xpath_functions.rs index ae68edf3..49543688 100644 --- a/src/xpath_functions.rs +++ b/src/xpath_functions.rs @@ -23,6 +23,7 @@ use crate::definitions::{Definitions, SPEECH_DEFINITIONS, BRAILLE_DEFINITIONS}; use regex::Regex; use crate::pretty_print::mml_to_string; use std::cell::{Ref, RefCell}; +use std::sync::LazyLock; use std::thread::LocalKey; use phf::phf_set; use sxd_xpath::function::Error as XPathError; @@ -265,9 +266,7 @@ impl IsNode { // Returns true if 'frac' is a common fraction // In this case, the numerator and denominator can be no larger than 'num_limit' and 'denom_limit' fn is_common_fraction(frac: Element, num_limit: usize, denom_limit: usize) -> bool { - lazy_static! { - static ref ALL_DIGITS: Regex = Regex::new(r"\d+").unwrap(); // match one or more digits - } + static ALL_DIGITS: LazyLock = LazyLock::new(|| Regex::new(r"\d+").unwrap()); // match one or more digits if !is_tag(frac, "mfrac") && !is_tag(frac, "fraction"){ return false; @@ -449,9 +448,7 @@ impl ToOrdinal { * Returns the string representation of that number or an error message */ fn convert(number: &str, fractional: bool, plural: bool) -> Option { - lazy_static! { - static ref NO_DIGIT: Regex = Regex::new(r"[^\d]").unwrap(); // match anything except a digit - } + static NO_DIGIT: LazyLock = LazyLock::new(|| Regex::new(r"[^\d]").unwrap()); // match anything except a digit return SPEECH_DEFINITIONS.with(|definitions| { let definitions = definitions.borrow(); let numbers_large = definitions.get_vec("NumbersLarge")?; @@ -1349,10 +1346,8 @@ pub struct FontSizeGuess; // returns original node match isn't found impl FontSizeGuess { pub fn em_from_value(value_with_unit: &str) -> f64 { - lazy_static! { - // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?) - static ref FONT_VALUE: Regex = Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap(); - } + // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?) + static FONT_VALUE: LazyLock = LazyLock::new(|| { Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap() }); let cap = FONT_VALUE.captures(value_with_unit); if let Some(cap) = cap { if cap.len() == 3 { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 736e2b84..9faf074e 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -2,8 +2,7 @@ #[cfg(test)] use regex::Regex; -extern crate lazy_static; -use lazy_static::lazy_static; +use std::sync::LazyLock; pub use libmathcat::interface::*; @@ -35,9 +34,7 @@ pub fn abs_rules_dir_path() -> String { // Strip spaces from 'str' so comparison doesn't need to worry about spacing #[allow(dead_code)] // used in testing fn strip_spaces(str: &str) -> String { - lazy_static! { - static ref SPACES: Regex = Regex::new(r" +").unwrap(); - } + static SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); return String::from(SPACES.replace_all(str, " ")); }