diff --git a/Cargo.toml b/Cargo.toml index bf013e23..63f6a518 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ sxd-document = "0.3" sxd-xpath = "0.4" yaml-rust = "0.4" # yaml-rust = { version = "0.11", package = "yaml-rust2" } -lazy_static = "1.4" strum = "0.27.2" strum_macros = "0.27.2" anyhow = "1.0" diff --git a/src/braille.rs b/src/braille.rs index 34a65a9c..b7f9d92f 100644 --- a/src/braille.rs +++ b/src/braille.rs @@ -13,6 +13,7 @@ use crate::speech::{BRAILLE_RULES, SpeechRulesWithContext, braille_replace_chars use crate::canonicalize::get_parent; use std::borrow::Cow; use std::ops::Range; +use std::sync::LazyLock; static UEB_PREFIXES: phf::Set = phf_set! { '⠼', '⠈', '⠘', '⠸', '⠐', '⠨', '⠰', '⠠', @@ -542,106 +543,99 @@ fn nemeth_cleanup(pref_manager: Ref, raw_braille: String) -> "↓" => "⠰", // subscript }; - lazy_static! { - // Add an English Letter indicator. This involves finding "single letters". - // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation: - // punctuation or whitespace on the left and right ignoring open/close chars - // https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf - static ref ADD_ENGLISH_LETTER_INDICATOR: Regex = - Regex::new(r"(?P^|W|P.[\u2800-\u28FF]?|,)(?P[\u2800-\u28FF]?⠷)?(?PC?L.)(?P[\u2800-\u28FF]?⠾)?(?PW|P|,|$)").unwrap(); + // Add an English Letter indicator. This involves finding "single letters". + // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation: + // punctuation or whitespace on the left and right ignoring open/close chars + // https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf + static ADD_ENGLISH_LETTER_INDICATOR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?P^|W|P.[\u2800-\u28FF]?|,)(?P[\u2800-\u28FF]?⠷)?(?PC?L.)(?P[\u2800-\u28FF]?⠾)?(?PW|P|,|$)").unwrap() + }); - // Trim braille spaces before and after braille indicators - // In order: fraction, /, cancellation, letter, baseline - // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" - static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = - Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap(); - static ref REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: Regex = - Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap(); - - // Hack to convert non-numeric '.' to numeric '.' - // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2. - static ref DOTS_99_A_2: Regex = Regex::new(r"𝑁⠨mN").unwrap(); - - // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below - static ref REMOVE_SPACE_BEFORE_PUNCTUATION_151: Regex = - Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap(); - static ref REMOVE_SPACE_AFTER_PUNCTUATION_151: Regex = - Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap(); - - // Multipurpose indicator insertion - // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex) - - // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N - static ref MULTI_177_2: Regex = - Regex::new(r"([Ll].)[N𝑁]").unwrap(); - - // keep between numeric subscript and digit ('M' added by subscript rule) - static ref MULTI_177_3: Regex = - Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap(); - - // Add after decimal pt for non-digits except for comma and punctuation - // Note: since "." can be in the middle of a number, there is not necessarily a "N" - // Although not mentioned in 177_5, don't add an 'M' before an 'm' - static ref MULTI_177_5: Regex = - Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap(); - - - // Pattern for rule II.9a (add numeric indicator at start of line or after a space) - // 1. start of line - // 2. optional minus sign (⠤) - // 3. optional typeface indicator - // 4. number (N) - static ref NUM_IND_9A: Regex = - Regex::new(r"(?P^|[,Ww])(?P⠤?)N").unwrap(); - - // Needed after section mark(§), paragraph mark(¶), #, or * - static ref NUM_IND_9C: Regex = - Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap(); - - // Needed after section mark(§), paragraph mark(¶), #, or * - static ref NUM_IND_9D: Regex = - Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap(); - - // Needed after a typeface change or interior shape modifier indicator - static ref NUM_IND_9E: Regex = Regex::new(r"(?P[SB𝔹TIR]+?)N").unwrap(); - static ref NUM_IND_9E_SHAPE: Regex = Regex::new(r"(?P⠸⠫)N").unwrap(); - - // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d) - // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used - static ref NUM_IND_9F: Regex = Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap(); - - // Enclosed list exception - // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars). - // The green book says "at the start" of an item, don't add the numeric indicator. - // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space" - static ref NUM_IND_ENCLOSED_LIST: Regex = Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap(); - - // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…") - // Never use punctuation indicator before these (38-6) - // "…": "⠀⠄⠄⠄" - // "-": "⠸⠤" (hyphen and dash) - // ",": "⠠⠀" -- spacing already added - // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit] - // because this is run after the above rule, some cases are already caught, so don't - // match if there is already a numeric indicator - static ref NUM_IND_9B: Regex = Regex::new(r"(?PP..?)(?P⠤?)N").unwrap(); - - // Before 79b (punctuation) - static ref REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: Regex = Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap(); - - // Most commas have a space after them, but not when followed by a close quote (others?) - static ref NO_SPACE_AFTER_COMMA: Regex = Regex::new(r",P⠴").unwrap(); // captures both single and double close quote - static ref REMOVE_LEVEL_IND_BEFORE_BASELINE: Regex = Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap(); - - // Except for the four chars above, the unicode rules always include a punctuation indicator. - // The cases to remove them (that seem relevant to MathML) are: - // Beginning of line or after a space (V 38.1) - // After a word (38.4) - // 2nd or subsequent punctuation (includes, "-", etc) (38.7) - static ref REMOVE_AFTER_PUNCT_IND: Regex = Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap(); - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r"⠀⠀+").unwrap(); - } + // Trim braille spaces before and after braille indicators + // In order: fraction, /, cancellation, letter, baseline + // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" + static REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: LazyLock = + LazyLock::new(|| Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap()); + static REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: LazyLock = + LazyLock::new(|| Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap()); + + // Hack to convert non-numeric '.' to numeric '.' + // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2. + static DOTS_99_A_2: LazyLock = LazyLock::new(|| Regex::new(r"𝑁⠨mN").unwrap()); + + // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below + static REMOVE_SPACE_BEFORE_PUNCTUATION_151: LazyLock = + LazyLock::new(|| Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap()); + static REMOVE_SPACE_AFTER_PUNCTUATION_151: LazyLock = + LazyLock::new(|| Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap()); + + // Multipurpose indicator insertion + // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex) + + // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N + static MULTI_177_2: LazyLock = LazyLock::new(|| Regex::new(r"([Ll].)[N𝑁]").unwrap()); + + // keep between numeric subscript and digit ('M' added by subscript rule) + static MULTI_177_3: LazyLock = LazyLock::new(|| Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap()); + + // Add after decimal pt for non-digits except for comma and punctuation + // Note: since "." can be in the middle of a number, there is not necessarily a "N" + // Although not mentioned in 177_5, don't add an 'M' before an 'm' + static MULTI_177_5: LazyLock = + LazyLock::new(|| Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap()); + + // Pattern for rule II.9a (add numeric indicator at start of line or after a space) + // 1. start of line + // 2. optional minus sign (⠤) + // 3. optional typeface indicator + // 4. number (N) + static NUM_IND_9A: LazyLock = LazyLock::new(|| Regex::new(r"(?P^|[,Ww])(?P⠤?)N").unwrap()); + + // Needed after section mark(§), paragraph mark(¶), #, or * + static NUM_IND_9C: LazyLock = LazyLock::new(|| Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap()); + + // Needed after section mark(§), paragraph mark(¶), #, or * + static NUM_IND_9D: LazyLock = LazyLock::new(|| Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap()); + + // Needed after a typeface change or interior shape modifier indicator + static NUM_IND_9E: LazyLock = LazyLock::new(|| Regex::new(r"(?P[SB𝔹TIR]+?)N").unwrap()); + static NUM_IND_9E_SHAPE: LazyLock = LazyLock::new(|| Regex::new(r"(?P⠸⠫)N").unwrap()); + + // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d) + // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used + static NUM_IND_9F: LazyLock = LazyLock::new(|| Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap()); + + // Enclosed list exception + // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars). + // The green book says "at the start" of an item, don't add the numeric indicator. + // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space" + static NUM_IND_ENCLOSED_LIST: LazyLock = LazyLock::new(|| Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap()); + + // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…") + // Never use punctuation indicator before these (38-6) + // "…": "⠀⠄⠄⠄" + // "-": "⠸⠤" (hyphen and dash) + // ",": "⠠⠀" -- spacing already added + // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit] + // because this is run after the above rule, some cases are already caught, so don't + // match if there is already a numeric indicator + static NUM_IND_9B: LazyLock = LazyLock::new(|| Regex::new(r"(?PP..?)(?P⠤?)N").unwrap()); + + // Before 79b (punctuation) + static REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: LazyLock = LazyLock::new(|| Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap()); + + // Most commas have a space after them, but not when followed by a close quote (others?) + static NO_SPACE_AFTER_COMMA: LazyLock = LazyLock::new(|| Regex::new(r",P⠴").unwrap()); // captures both single and double close quote + static REMOVE_LEVEL_IND_BEFORE_BASELINE: LazyLock = LazyLock::new(|| Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap()); + + // Except for the four chars above, the unicode rules always include a punctuation indicator. + // The cases to remove them (that seem relevant to MathML) are: + // Beginning of line or after a space (V 38.1) + // After a word (38.4) + // 2nd or subsequent punctuation (includes, "-", etc) (38.7) + static REMOVE_AFTER_PUNCT_IND: LazyLock = LazyLock::new(|| Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap()); + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap()); + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap()); // debug!("Before: \"{}\"", raw_braille); // replacements might overlap at boundaries (e.g., whitespace) -- need to repeat @@ -845,15 +839,13 @@ static LETTER_PREFIXES: phf::Set = phf_set! { 'B', 'I', '𝔹', 'S', 'T', 'D', 'C', '𝐶', '𝑐', }; -lazy_static! { - // Trim braille spaces before and after braille indicators - // In order: fraction, /, cancellation, letter, baseline - // Note: fraction over is not listed due to example 42(4) which shows a space before the "/" - // static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = - // Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap(); - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r"⠀⠀+").unwrap(); -} +// Trim braille spaces before and after braille indicators +// In order: fraction, /, cancellation, letter, baseline +// Note: fraction over is not listed due to example 42(4) which shows a space before the "/" +// static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex = +// Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap(); +static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap()); +static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap()); fn is_short_form(chars: &[char]) -> bool { let chars_as_string = chars.iter().map(|ch| ch.to_string()).collect::(); @@ -1111,9 +1103,7 @@ fn ueb_cleanup(pref_manager: Ref, raw_braille: String) -> Str } fn typeface_to_word_mode(braille: &str) -> String { - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new("[BI𝔹STD]").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new("[BI𝔹STD]").unwrap()); // debug!("before typeface fix: '{}'", braille); let mut result = "".to_string(); @@ -1711,8 +1701,7 @@ fn handle_contractions(chars: &[char], mut result: String) -> String { } // It would be much better from an extensibility point of view to read the table in from a file - lazy_static! { - static ref CONTRACTIONS: Vec = vec![ + static CONTRACTIONS: LazyLock> = LazyLock::new(|| { vec![ // 10.3: Strong contractions Replacement{ pattern: to_unicode_braille("and"), replacement: "L⠯"}, Replacement{ pattern: to_unicode_braille("for"), replacement: "L⠿"}, @@ -1753,11 +1742,11 @@ fn handle_contractions(chars: &[char], mut result: String) -> String { Replacement{ pattern: to_unicode_braille("en"), replacement: "⠢"}, Replacement{ pattern: to_unicode_braille("in"), replacement: "⠔"}, - ]; + ] + }); - static ref CONTRACTION_PATTERNS: RegexSet = init_patterns(&CONTRACTIONS); - static ref CONTRACTION_REGEX: Vec = init_regex(&CONTRACTIONS); - } + static CONTRACTION_PATTERNS: LazyLock = LazyLock::new(|| init_patterns(&CONTRACTIONS)); + static CONTRACTION_REGEX: LazyLock> = LazyLock::new(|| init_regex(&CONTRACTIONS)); let mut chars_as_str = chars.iter().collect::(); // debug!(" handle_contractions: examine '{}'", &chars_as_str); @@ -1832,20 +1821,17 @@ static VIETNAM_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { }; fn vietnam_cleanup(pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - // Deal with Vietnamese "rhymes" -- moving accents around - // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement - // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules - // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later - static ref QU_GI_RULE_EXCEPTION: Regex = Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap(); - static ref IUOY_E_RULE: Regex = Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap(); // ie, ue, oe, and ye rule - static ref UO_A_RULE: Regex = Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap(); // ua, oa rule - static ref UU_O_RULE: Regex = Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap(); // uo, ưo rule - static ref UYE_RULE: Regex = Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap(); // uo, ưo rule - static ref UY_RULE: Regex = Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap(); // uo, ưo rule - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap(); - - } + // Deal with Vietnamese "rhymes" -- moving accents around + // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement + // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules + // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later + static QU_GI_RULE_EXCEPTION: LazyLock = LazyLock::new(|| Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap()); + static IUOY_E_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap()); // ie, ue, oe, and ye rule + static UO_A_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap()); // ua, oa rule + static UU_O_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap()); // uo, ưo rule + static UYE_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap()); // uo, ưo rule + static UY_RULE: LazyLock = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap()); // uo, ưo rule + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap()); // debug!("vietnam_cleanup: start={}", raw_braille); let result = typeface_to_word_mode(&raw_braille); let result = capitals_to_word_mode(&result); @@ -1940,9 +1926,7 @@ static CMU_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { fn cmu_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref ADD_WHITE_SPACE: Regex = Regex::new(r"𝘄(.)|𝘄$").unwrap(); - } + static ADD_WHITE_SPACE: LazyLock = LazyLock::new(|| Regex::new(r"𝘄(.)|𝘄$").unwrap()); // debug!("cmu_cleanup: start={}", raw_braille); // let result = typeface_to_word_mode(&raw_braille); @@ -2076,12 +2060,10 @@ static FINNISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! { }; fn finnish_cleanup(pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REPLACE_INDICATORS: Regex =Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap(); - // Numbers need to end with a space, but sometimes there is one there for other reasons - static ref DROP_NUMBER_SEPARATOR: Regex = Regex::new(r"(n.)\)").unwrap(); - static ref NUMBER_MATCH: Regex = Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap(); - } + static REPLACE_INDICATORS: LazyLock = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap()); + // Numbers need to end with a space, but sometimes there is one there for other reasons + static DROP_NUMBER_SEPARATOR: LazyLock = LazyLock::new(|| Regex::new(r"(n.)\)").unwrap()); + static NUMBER_MATCH: LazyLock = LazyLock::new(|| Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap()); // debug!("finnish_cleanup: start={}", raw_braille); let result = DROP_NUMBER_SEPARATOR.replace_all(&raw_braille, |cap: &Captures| { @@ -2140,10 +2122,8 @@ fn finnish_cleanup(pref_manager: Ref, raw_braille: String) -> fn swedish_cleanup(pref_manager: Ref, raw_braille: String) -> String { // FIX: need to implement this -- this is just a copy of the Vietnam code - lazy_static! { - // Empty bases are ok if they follow whitespace - static ref EMPTY_BASE: Regex = Regex::new(r"(^|[W𝐖w])E").unwrap(); - } + // Empty bases are ok if they follow whitespace + static EMPTY_BASE: LazyLock = LazyLock::new(|| Regex::new(r"(^|[W𝐖w])E").unwrap()); // debug!("swedish_cleanup: start={}", raw_braille); let result = typeface_to_word_mode(&raw_braille); let result = capitals_to_word_mode(&result); @@ -2189,10 +2169,8 @@ fn swedish_cleanup(pref_manager: Ref, raw_braille: String) -> #[allow(non_snake_case)] fn LaTeX_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REMOVE_SPACE: Regex =Regex::new(r" ([\^_,;)\]}])").unwrap(); // '^', '_', ',', ';', ')', ']', '}' - static ref COLLAPSE_SPACES: Regex = Regex::new(r" +").unwrap(); - } + static REMOVE_SPACE: LazyLock = LazyLock::new(|| Regex::new(r" ([\^_,;)\]}])").unwrap()); // '^', '_', ',', ';', ')', ']', '}' + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); // debug!("LaTeX_cleanup: start={}", raw_braille); let result = raw_braille.replace('𝐖', " "); // let result = COLLAPSE_SPACES.replace_all(&raw_braille, "⠀"); @@ -2208,11 +2186,9 @@ fn LaTeX_cleanup(_pref_manager: Ref, raw_braille: String) -> #[allow(non_snake_case)] fn ASCIIMath_cleanup(_pref_manager: Ref, raw_braille: String) -> String { - lazy_static! { - static ref REMOVE_SPACE_BEFORE_OP: Regex = Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap(); - static ref REMOVE_SPACE_AFTER_OP: Regex = Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap(); - static ref COLLAPSE_SPACES: Regex = Regex::new(r" +").unwrap(); - } + static REMOVE_SPACE_BEFORE_OP: LazyLock = LazyLock::new(|| Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap()); + static REMOVE_SPACE_AFTER_OP: LazyLock = LazyLock::new(|| Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap()); + static COLLAPSE_SPACES: LazyLock = LazyLock::new(|| Regex::new(r" +").unwrap()); // debug!("ASCIIMath_cleanup: start={}", raw_braille); let result = raw_braille.replace("|𝐖__|", "|𝐰__|"); // protect the whitespace to prevent misinterpretation as lfloor let result = result.replace('𝐖', " "); @@ -2363,14 +2339,13 @@ impl BrailleChars { } fn get_braille_nemeth_chars(node: Element, text_range: Option>) -> Result { - lazy_static! { - // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them: - // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman - // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian - // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?P[SB𝔹TIR]*)(?P[EDGVHU]?)(?PC?)(?PL?)(?P[N]?)(?P.)").unwrap(); - } + // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them: + // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman + // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian + // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?P[SB𝔹TIR]*)(?P[EDGVHU]?)(?PC?)(?PL?)(?P[N]?)(?P.)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); // FIX: cover all the options -- use phf::Map let attr_typeface = match math_variant { @@ -2442,11 +2417,10 @@ impl BrailleChars { // this routine merely deals with the mathvariant attr. // Canonicalize has already transformed all chars it can to math alphanumerics, but not all have bold/italic // The typeform/caps transforms to (potentially) word mode are handled later. - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap(); - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap()); + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); let text = BrailleChars::substring(as_text(node), &text_range); @@ -2496,11 +2470,10 @@ impl BrailleChars { // In CMU, we need to replace spaces used for number blocks with "." // For other numbers, we need to add "." to create digit blocks - lazy_static! { - static ref HAS_TYPEFACE: Regex = Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap(); - static ref PICK_APART_CHAR: Regex = - Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap(); - } + static HAS_TYPEFACE: LazyLock = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap()); + static PICK_APART_CHAR: LazyLock = LazyLock::new(|| { + Regex::new(r"(?PB??)(?PI??)(?P[S𝔹TD]??)s??(?PC??)(?PG??)(?P[NL].)").unwrap() + }); let math_variant = node.attribute_value("mathvariant"); let text = BrailleChars::substring(as_text(node), &text_range); diff --git a/src/canonicalize.rs b/src/canonicalize.rs index df52f56c..12e69e98 100644 --- a/src/canonicalize.rs +++ b/src/canonicalize.rs @@ -20,6 +20,7 @@ use std::fmt; use crate::chemistry::*; use unicode_script::Script; use roman_numerals_rs::RomanNumeral; +use std::sync::LazyLock; // FIX: DECIMAL_SEPARATOR should be set by env, or maybe language const DECIMAL_SEPARATOR: &str = "."; @@ -48,60 +49,58 @@ static AMBIGUOUS_OPERATORS: phf::Set<&str> = phf_set! { }; // static vars used when canonicalizing -lazy_static!{ - // lowest priority operator so it is never popped off the stack - static ref LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None }; +// lowest priority operator so it is never popped off the stack +static LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None }; - static ref INVISIBLE_FUNCTION_APPLICATION: &'static OperatorInfo = OPERATORS.get("\u{2061}").unwrap(); - static ref IMPLIED_TIMES: &'static OperatorInfo = OPERATORS.get("\u{2062}").unwrap(); - static ref IMPLIED_INVISIBLE_COMMA: &'static OperatorInfo = OPERATORS.get("\u{2063}").unwrap(); - static ref IMPLIED_INVISIBLE_PLUS: &'static OperatorInfo = OPERATORS.get("\u{2064}").unwrap(); +static INVISIBLE_FUNCTION_APPLICATION: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2061}").unwrap()); +static IMPLIED_TIMES: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2062}").unwrap()); +static IMPLIED_INVISIBLE_COMMA: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2063}").unwrap()); +static IMPLIED_INVISIBLE_PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2064}").unwrap()); - // FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)? - static ref PLUS: &'static OperatorInfo = OPERATORS.get("+").unwrap(); - static ref MINUS: &'static OperatorInfo = OPERATORS.get("-").unwrap(); - static ref PREFIX_MINUS: &'static OperatorInfo = MINUS.next.as_ref().unwrap(); +// FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)? +static PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("+").unwrap()); +static MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("-").unwrap()); +static PREFIX_MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| MINUS.next.as_ref().unwrap()); - static ref TIMES_SIGN: &'static OperatorInfo = OPERATORS.get("×").unwrap(); +static TIMES_SIGN: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("×").unwrap()); - // IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority - static ref IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 851, next: &None - }; - // IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC - static ref IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 901, next: &None - }; - // IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds - static ref IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 905, next: &None - }; - static ref IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4 - op_type: OperatorTypes::INFIX, priority: 881, next: &None - }; +// IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority +static IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 851, next: &None +}; +// IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC +static IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 901, next: &None +}; +// IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds +static IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 905, next: &None +}; +static IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4 + op_type: OperatorTypes::INFIX, priority: 881, next: &None +}; - // Useful static defaults to have available if there is no character match - static ref DEFAULT_OPERATOR_INFO_PREFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::PREFIX, priority: 260, next: &None - }; - static ref DEFAULT_OPERATOR_INFO_INFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 260, next:& None - }; - static ref DEFAULT_OPERATOR_INFO_POSTFIX: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::POSTFIX, priority: 260, next: &None - }; +// Useful static defaults to have available if there is no character match +static DEFAULT_OPERATOR_INFO_PREFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::PREFIX, priority: 260, next: &None +}; +static DEFAULT_OPERATOR_INFO_INFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 260, next:& None +}; +static DEFAULT_OPERATOR_INFO_POSTFIX: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::POSTFIX, priority: 260, next: &None +}; - // avoids having to use Option in some cases - static ref ILLEGAL_OPERATOR_INFO: &'static OperatorInfo = &OperatorInfo{ - op_type: OperatorTypes::INFIX, priority: 999, next: &None - }; +// avoids having to use Option in some cases +static ILLEGAL_OPERATOR_INFO: OperatorInfo = OperatorInfo{ + op_type: OperatorTypes::INFIX, priority: 999, next: &None +}; - // used to tell if an operator is a relational operator - static ref EQUAL_PRIORITY: usize = OPERATORS.get("=").unwrap().priority; +// used to tell if an operator is a relational operator +static EQUAL_PRIORITY: LazyLock = LazyLock::new(|| OPERATORS.get("=").unwrap().priority); - // useful for detecting whitespace - static ref IS_WHITESPACE: Regex = Regex::new(r"^\s+$").unwrap(); // only Unicode whitespace -} +// useful for detecting whitespace +static IS_WHITESPACE: LazyLock = LazyLock::new(|| Regex::new(r"^\s+$").unwrap()); // only Unicode whitespace // Operators are either PREFIX, INFIX, or POSTFIX, but can also have other properties such as LEFT_FENCE bitflags! { @@ -295,7 +294,7 @@ impl<'a, 'op:'a> StackInfo<'a, 'op> { // debug!(" adding '{}' to mrow[{}], operator '{}/{}'", // element_summary(child), self.mrow.children().len(), show_invisible_op_char(child_op.ch), child_op.op.priority); self.mrow.append_child(child); - if ptr_eq(child_op.op, *ILLEGAL_OPERATOR_INFO) { + if ptr_eq(child_op.op, &ILLEGAL_OPERATOR_INFO) { assert!(!self.is_operand); // should not have two operands in a row (ok to add whitespace) self.is_operand = true; } else { @@ -444,12 +443,10 @@ static EMPTY_ELEMENTS: phf::Set<&str> = phf_set! { "mspace", "none", "mprescripts", "mglyph", "malignmark", "maligngroup", "msline", }; -lazy_static! { - // turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match - // two cases because we don't want to have a match for 'Cl', etc. - static ref UPPER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap(); - static ref LOWER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap(); -} +// turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match +// two cases because we don't want to have a match for 'Cl', etc. +static UPPER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap()); +static LOWER_ROMAN_NUMERAL: LazyLock = LazyLock::new(|| Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap()); struct CanonicalizeContextPatterns { @@ -740,12 +737,10 @@ impl CanonicalizeContext { /// Returns 'None' if the element should not be in the tree. fn clean_mathml<'a>(&self, mathml: Element<'a>) -> Option> { // Note: this works bottom-up (clean the children first, then this element) - lazy_static! { - static ref IS_PRIME: Regex = Regex::new(r"['′″‴⁗]").unwrap(); + static IS_PRIME: LazyLock = LazyLock::new(|| Regex::new(r"['′″‴⁗]").unwrap()); - // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _") - static ref IS_UNDERSCORE: Regex = Regex::new(r"^[_\u{00A0}]+$").unwrap(); - } + // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _") + static IS_UNDERSCORE: LazyLock = LazyLock::new(|| Regex::new(r"^[_\u{00A0}]+$").unwrap()); static CURRENCY_SYMBOLS: phf::Set<&str> = phf_set! { @@ -1262,9 +1257,7 @@ impl CanonicalizeContext { fn is_proportional_before_colon<'a>(siblings: impl Iterator>) -> Option { // unparsed, so we look at relative priorities to make sure the proportional operator is really the next operator - lazy_static!{ - static ref PROPORTIONAL_PRIORITY: usize = OPERATORS.get("∷").unwrap().priority; - } + static PROPORTIONAL_PRIORITY: LazyLock = LazyLock::new(|| OPERATORS.get("∷").unwrap().priority); for sibling in siblings { let child = as_element(*sibling); if name(child) == "mo" { @@ -1381,9 +1374,7 @@ impl CanonicalizeContext { /// looks for pairs of (letter, pseudoscript) such as x' or p'q' all inside of a single token element fn split_apart_pseudo_scripts<'a>(mi: Element<'a>) -> Option> { - lazy_static!{ - static ref IS_DEGREES_C_or_F: Regex = Regex::new(r"[°º][CF]").unwrap(); - } + static IS_DEGREES_C_or_F: LazyLock = LazyLock::new(|| Regex::new(r"[°º][CF]").unwrap()); let text = as_text(mi); // debug!("split_apart_pseudo_scripts: start text=\"{text}\""); @@ -1974,9 +1965,7 @@ impl CanonicalizeContext { /// under some specific conditions (trying to be a little cautious). /// The returned (mrow) element reuses the arg so tree siblings links remain correct. fn split_points(leaf: Element) -> Option { - lazy_static!{ - static ref IS_UPPERCASE: Regex = Regex::new(r"^[A-Z]+$").unwrap(); - } + static IS_UPPERCASE: LazyLock = LazyLock::new(|| Regex::new(r"^[A-Z]+$").unwrap()); if !IS_UPPERCASE.is_match(as_text(leaf)) { return None; @@ -2178,9 +2167,7 @@ impl CanonicalizeContext { // Check if start..end is a number fn is_likely_a_number(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool { - lazy_static! { - static ref IS_HEX_BLOCK: Regex = Regex::new("[a-eh-z]").unwrap(); - } + static IS_HEX_BLOCK: LazyLock = LazyLock::new(|| Regex::new("[a-eh-z]").unwrap()); // Note: the children of math_or_mrow aren't valid ('children' represents the current state) let end = children.len(); // { @@ -3260,7 +3247,7 @@ impl CanonicalizeContext { } fn canonicalize_mo_text(&self, mo: Element) { - // lazy_static! { + // lazy_static! { (NOTE: std::sync::LazyLock is now used instead) // static ref IS_LIKELY_SCALAR_VARIABLE: Regex = Regex::new("[a-eh-z]").unwrap(); // } @@ -3353,7 +3340,7 @@ impl CanonicalizeContext { }; let found_op_info = if mo_node.attribute_value(CHEMICAL_BOND).is_some() { - Some(&*IMPLIED_CHEMICAL_BOND) + Some(&IMPLIED_CHEMICAL_BOND) } else { OPERATORS.get(as_text(mo_node)) }; @@ -3364,7 +3351,7 @@ impl CanonicalizeContext { let found_op_info = found_op_info.unwrap(); let matching_op_info = find_operator_info(found_op_info, op_type, form.is_some()); - if ptr_eq(matching_op_info, *ILLEGAL_OPERATOR_INFO) { + if ptr_eq(matching_op_info, &ILLEGAL_OPERATOR_INFO) { return op_not_in_operator_dictionary(op_type); } else { return matching_op_info; @@ -4113,7 +4100,7 @@ impl CanonicalizeContext { } return false; } - return ptr_eq(op_on_top.op, &*IMPLIED_TIMES_HIGH_PRIORITY); + return ptr_eq(op_on_top.op, &IMPLIED_TIMES_HIGH_PRIORITY); fn is_trig(node: Element) -> bool { let base_of_name = get_possible_embellished_node(node); @@ -4223,17 +4210,17 @@ impl CanonicalizeContext { base_of_child.remove_attribute("data-was-mo"); set_mathml_name(base_of_child, "mo"); let mut top_of_stack = parse_stack.pop().unwrap(); - top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: &INVISIBLE_FUNCTION_APPLICATION}); // whitespace -- make part of mrow to keep out of parse + top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: *INVISIBLE_FUNCTION_APPLICATION}); // whitespace -- make part of mrow to keep out of parse parse_stack.push(top_of_stack); continue; } // consecutive operands -- add an invisible operator as appropriate current_op = if likely_function_name == FunctionNameCertainty::True { - OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION } + OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION } } else if self.is_mixed_fraction(previous_child, &children[i_child..])? { - OperatorPair{ ch: "\u{2064}", op: &IMPLIED_INVISIBLE_PLUS } + OperatorPair{ ch: "\u{2064}", op: *IMPLIED_INVISIBLE_PLUS } } else if self.is_implied_comma(previous_child, current_child, mrow) { - OperatorPair{ch: "\u{2063}", op: &IMPLIED_INVISIBLE_COMMA } + OperatorPair{ch: "\u{2063}", op: *IMPLIED_INVISIBLE_COMMA } } else if self.is_implied_chemical_bond(previous_child, current_child) { OperatorPair{ch: "\u{2063}", op: &IMPLIED_CHEMICAL_BOND } } else if self.is_implied_separator(previous_child, current_child) { @@ -4241,7 +4228,7 @@ impl CanonicalizeContext { } else if self.is_trig_arg(base_of_previous_child, base_of_child, &mut parse_stack) { OperatorPair{ch: "\u{2062}", op: &IMPLIED_TIMES_HIGH_PRIORITY } } else { - OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES } + OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES } }; if let Some(attr_val) = base_of_child.attribute_value(CHANGED_ATTR) && attr_val == "data-was-mo" { @@ -4272,16 +4259,16 @@ impl CanonicalizeContext { } } - if !ptr_eq(current_op.op, *ILLEGAL_OPERATOR_INFO) { + if !ptr_eq(current_op.op, &ILLEGAL_OPERATOR_INFO) { if current_op.op.is_left_fence() || current_op.op.is_prefix() { if top(&parse_stack).is_operand { // will end up with operand operand -- need to choose operator associated with prev child // we use the original input here because in this case, we need to look to the right of the ()s to deal with chemical states let likely_function_name = self.is_function_name(as_element(children[i_child-1]), Some(&children[i_child..])); let implied_operator = if likely_function_name== FunctionNameCertainty::True { - OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION } + OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION } } else { - OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES } + OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES } }; // debug!(" adding implied {}", if ptr_eq(implied_operator.op,*IMPLIED_TIMES) {"times"} else {"function apply"}); diff --git a/src/lib.rs b/src/lib.rs index ac7db772..7397c4a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,9 +23,6 @@ pub mod errors { pub use anyhow::{anyhow, bail, Error, Result, Context}; } -#[macro_use] -extern crate lazy_static; - #[macro_use] extern crate bitflags;