From 6792e38448d6542b24f783db16651ac7c904d532 Mon Sep 17 00:00:00 2001 From: null8626 Date: Sun, 4 Feb 2024 13:19:35 +0700 Subject: [PATCH 1/6] refactor: code cleanup + clippy --- src/char_data/mod.rs | 5 +---- src/char_data/tables.rs | 4 ++-- src/explicit.rs | 5 +---- src/implicit.rs | 33 ++++++++++++++------------------- src/level.rs | 20 ++++++++++++-------- src/lib.rs | 18 +++++++++--------- src/prepare.rs | 28 ++++++++++------------------ src/utf16.rs | 12 ++++++------ 8 files changed, 55 insertions(+), 70 deletions(-) diff --git a/src/char_data/mod.rs b/src/char_data/mod.rs index 4edf5b8..543b0ed 100644 --- a/src/char_data/mod.rs +++ b/src/char_data/mod.rs @@ -59,10 +59,7 @@ pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option bool { - match bidi_class { - RLE | RLO | RLI => true, - _ => false, - } + matches!(bidi_class, RLE | RLO | RLI) } #[cfg(feature = "hardcoded-data")] diff --git a/src/char_data/tables.rs b/src/char_data/tables.rs index ecdcf49..f10265d 100644 --- a/src/char_data/tables.rs +++ b/src/char_data/tables.rs @@ -45,7 +45,7 @@ pub enum BidiClass { use self::BidiClass::*; #[cfg(feature = "hardcoded-data")] -pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ +pub const bidi_class_table: &[(char, char, BidiClass)] = &[ ('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S), ('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}', @@ -516,7 +516,7 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ '\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L) ]; -pub const bidi_pairs_table: &'static [(char, char, Option)] = &[ +pub const bidi_pairs_table: &[(char, char, Option)] = &[ ('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}', '\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}', '\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}', diff --git a/src/explicit.rs b/src/explicit.rs index d4ad897..5e78f28 100644 --- a/src/explicit.rs +++ b/src/explicit.rs @@ -52,10 +52,7 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( levels[i] = last_level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. - let is_isolate = match original_classes[i] { - RLI | LRI | FSI => true, - _ => false, - }; + let is_isolate = matches!(original_classes[i], RLI | LRI | FSI); if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. // levels[i] = last_level; diff --git a/src/implicit.rs b/src/implicit.rs index 0311053..8b665b4 100644 --- a/src/implicit.rs +++ b/src/implicit.rs @@ -224,22 +224,20 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( // W7. If the previous strong char was L, change EN to L. let mut last_strong_is_l = sequence.sos == L; - for run in &sequence.runs { - for i in run.clone() { - match processing_classes[i] { - EN if last_strong_is_l => { - processing_classes[i] = L; - } - L => { - last_strong_is_l = true; - } - R | AL => { - last_strong_is_l = false; - } - // - // Already scanning past BN here. - _ => {} + for i in sequence.runs.iter().cloned().flatten() { + match processing_classes[i] { + EN if last_strong_is_l => { + processing_classes[i] = L; + } + L => { + last_strong_is_l = true; + } + R | AL => { + last_strong_is_l = false; } + // + // Already scanning past BN here. + _ => {} } } } @@ -578,8 +576,5 @@ pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> L /// #[allow(non_snake_case)] fn is_NI(class: BidiClass) -> bool { - match class { - B | S | WS | ON | FSI | LRI | RLI | PDI => true, - _ => false, - } + matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) } diff --git a/src/level.rs b/src/level.rs index ef4f6d9..81b327a 100644 --- a/src/level.rs +++ b/src/level.rs @@ -13,10 +13,14 @@ //! //! -use alloc::string::{String, ToString}; -use alloc::vec::Vec; -use core::convert::{From, Into}; -use core::slice; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; +use core::{ + convert::{From, Into}, + slice, +}; use super::char_data::BidiClass; @@ -219,11 +223,11 @@ pub fn has_rtl(levels: &[Level]) -> bool { levels.iter().any(|&lvl| lvl.is_rtl()) } -impl Into for Level { +impl From for u8 { /// Convert to the level number #[inline] - fn into(self) -> u8 { - self.number() + fn from(val: Level) -> Self { + val.number() } } @@ -244,7 +248,7 @@ impl<'a> PartialEq<&'a str> for Level { } /// Used for matching levels in conformance tests -impl<'a> PartialEq for Level { +impl PartialEq for Level { #[inline] fn eq(&self, s: &String) -> bool { self == &s.as_str() diff --git a/src/lib.rs b/src/lib.rs index 1072b67..7175a01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -855,12 +855,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text str, +fn reorder_line( + text: &str, line: Range, levels: Vec, runs: Vec, -) -> Cow<'text, str> { +) -> Cow<'_, str> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -1122,20 +1122,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>( B | S => { assert_eq!(reset_to, None); reset_to = Some(i + T::char_len(c)); - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // // same as above + set the level RLE | LRE | RLO | LRO | PDF | BN => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } // also set the level to previous @@ -1294,8 +1294,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut isolate_level = 0; for c in text.chars() { match data_source.bidi_class(c) { - LRI | RLI | FSI => isolate_level = isolate_level + 1, - PDI if isolate_level > 0 => isolate_level = isolate_level - 1, + LRI | RLI | FSI => isolate_level += 1, + PDI if isolate_level > 0 => isolate_level -= 1, L if isolate_level == 0 => return Direction::Ltr, R | AL if isolate_level == 0 => return Direction::Rtl, B if !use_full_text => break, @@ -1342,7 +1342,7 @@ impl<'text> TextSource<'text> for str { } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf8IndexLenIter::new(&self) + Utf8IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { diff --git a/src/prepare.rs b/src/prepare.rs index 49f3b86..70fc774 100644 --- a/src/prepare.rs +++ b/src/prepare.rs @@ -55,7 +55,7 @@ pub fn isolating_run_sequences( let mut stack = vec![Vec::new()]; for run in runs { - assert!(run.len() > 0); + assert!(!run.is_empty()); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; @@ -67,8 +67,7 @@ pub fn isolating_run_sequences( .iter() .copied() .rev() - .filter(not_removed_by_x9) - .next() + .find(not_removed_by_x9) .unwrap_or(start_class); let mut sequence = if start_class == PDI && stack.len() > 1 { @@ -110,21 +109,17 @@ pub fn isolating_run_sequences( let end_of_seq = result.runs[runs_len - 1].end; // > (not counting characters removed by X9) - let seq_level = result + let seq_level = levels[result .iter_forwards_from(start_of_seq, 0) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[start_of_seq]); + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(start_of_seq)]; // XXXManishearth the spec talks of a start and end level, // but for a given IRS the two should be equivalent, yes? - let end_level = result + let end_level = levels[result .iter_backwards_from(end_of_seq, runs_len - 1) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[end_of_seq - 1]); + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(end_of_seq - 1)]; #[cfg(test)] for run in result.runs.clone() { @@ -156,7 +151,7 @@ pub fn isolating_run_sequences( .unwrap_or(BN); // Get the level of the next non-removed char after the runs. - let succ_level = if let RLI | LRI | FSI = last_non_removed { + let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { para_level } else { match original_classes[end_of_seq..] @@ -246,10 +241,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec /// /// pub fn removed_by_x9(class: BidiClass) -> bool { - match class { - RLE | LRE | RLO | LRO | PDF | BN => true, - _ => false, - } + matches!(class, RLE | LRE | RLO | LRO | PDF | BN) } // For use as a predicate for `position` / `rposition` diff --git a/src/utf16.rs b/src/utf16.rs index dcd9baf..02f83aa 100644 --- a/src/utf16.rs +++ b/src/utf16.rs @@ -551,12 +551,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text [u16], +fn reorder_line( + text: &[u16], line: Range, levels: Vec, runs: Vec, -) -> Cow<'text, [u16]> { +) -> Cow<'_, [u16]> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -668,15 +668,15 @@ impl<'text> TextSource<'text> for [u16] { } #[inline] fn chars(&'text self) -> Self::CharIter { - Utf16CharIter::new(&self) + Utf16CharIter::new(self) } #[inline] fn char_indices(&'text self) -> Self::CharIndexIter { - Utf16CharIndexIter::new(&self) + Utf16CharIndexIter::new(self) } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf16IndexLenIter::new(&self) + Utf16IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { From c2e82a6a7b07d0cfda5e2b9d43499ed46d778a5a Mon Sep 17 00:00:00 2001 From: null8626 Date: Sun, 4 Feb 2024 13:35:27 +0700 Subject: [PATCH 2/6] refactor: remove DirectionalStatusStack and use Vec instead --- src/explicit.rs | 86 ++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/src/explicit.rs b/src/explicit.rs index 5e78f28..d02692e 100644 --- a/src/explicit.rs +++ b/src/explicit.rs @@ -11,8 +11,6 @@ //! //! -use alloc::vec::Vec; - use super::char_data::{ is_rtl, BidiClass::{self, *}, @@ -35,28 +33,30 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( assert_eq!(text.len(), original_classes.len()); // - let mut stack = DirectionalStatusStack::new(); - stack.push(para_level, OverrideStatus::Neutral); + let mut stack = vec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; for (i, len) in text.indices_lengths() { + let last = stack.last().unwrap(); + match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { - let last_level = stack.last().level; - // - levels[i] = last_level; + levels[i] = last.level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. let is_isolate = matches!(original_classes[i], RLI | LRI | FSI); if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. - // levels[i] = last_level; - match stack.last().status { + // levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} @@ -64,22 +64,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( } let new_level = if is_rtl(original_classes[i]) { - last_level.new_explicit_next_rtl() + last.level.new_explicit_next_rtl() } else { - last_level.new_explicit_next_ltr() + last.level.new_explicit_next_ltr() }; + if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); - stack.push( - new_level, - match original_classes[i] { + + stack.push(Status { + level: new_level, + status: match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, - ); + }); + if is_isolate { valid_isolate_count += 1; } else { @@ -107,21 +110,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; - loop { - // Pop everything up to and including the last Isolate status. - match stack.vec.pop() { - None - | Some(Status { - status: OverrideStatus::Isolate, - .. - }) => break, - _ => continue, - } - } + + while !matches!( + stack.pop(), + None | Some(Status { + status: OverrideStatus::Isolate, + .. + }) + ) {} + valid_isolate_count -= 1; } - let last = stack.last(); + + let last = stack.last().unwrap(); levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, @@ -135,11 +138,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // do nothing } else if overflow_embedding_count > 0 { overflow_embedding_count -= 1; - } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { - stack.vec.pop(); + } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 { + stack.pop(); } + // - levels[i] = stack.last().level; + levels[i] = stack.last().unwrap().level; // X9 part of retaining explicit formatting characters. processing_classes[i] = BN; } @@ -150,8 +154,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // _ => { - let last = stack.last(); levels[i] = last.level; + // This condition is not in the spec, but I am pretty sure that is a spec bug. // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf if original_classes[i] != BN { @@ -185,23 +189,3 @@ enum OverrideStatus { LTR, Isolate, } - -struct DirectionalStatusStack { - vec: Vec, -} - -impl DirectionalStatusStack { - fn new() -> Self { - DirectionalStatusStack { - vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2), - } - } - - fn push(&mut self, level: Level, status: OverrideStatus) { - self.vec.push(Status { level, status }); - } - - fn last(&self) -> &Status { - self.vec.last().unwrap() - } -} From 0f72f1ce80163d72e2128b2c5484443ded816d70 Mon Sep 17 00:00:00 2001 From: null8626 Date: Sun, 4 Feb 2024 13:41:14 +0700 Subject: [PATCH 3/6] refactor: more misc cleanups --- src/deprecated.rs | 7 +++---- src/prepare.rs | 12 +++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/deprecated.rs b/src/deprecated.rs index 74a24f5..34aae01 100644 --- a/src/deprecated.rs +++ b/src/deprecated.rs @@ -71,10 +71,8 @@ pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; - while seq_end < run_count { - if levels[runs[seq_end].start] < max_level { - break; - } + + while seq_end < run_count && levels[runs[seq_end].start] >= max_level { seq_end += 1; } @@ -83,6 +81,7 @@ pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { seq_start = seq_end; } + max_level .lower(1) .expect("Lowering embedding level below zero"); diff --git a/src/prepare.rs b/src/prepare.rs index 70fc774..e4397b8 100644 --- a/src/prepare.rs +++ b/src/prepare.rs @@ -52,7 +52,7 @@ pub fn isolating_run_sequences( // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. - let mut stack = vec![Vec::new()]; + let mut stack = vec![vec![]]; for run in runs { assert!(!run.is_empty()); @@ -80,7 +80,7 @@ pub fn isolating_run_sequences( sequence.push(run); - if let RLI | LRI | FSI = end_class { + if matches!(end_class, RLI | LRI | FSI) { // Resume this sequence after the isolate. stack.push(sequence); } else { @@ -122,11 +122,9 @@ pub fn isolating_run_sequences( .unwrap_or(end_of_seq - 1)]; #[cfg(test)] - for run in result.runs.clone() { - for idx in run { - if not_removed_by_x9(&original_classes[idx]) { - assert_eq!(seq_level, levels[idx]); - } + for idx in result.runs.clone().into_iter().flatten() { + if not_removed_by_x9(&original_classes[idx]) { + assert_eq!(seq_level, levels[idx]); } } From 428e9e97a30e44eae78023c41b6b16261decc95f Mon Sep 17 00:00:00 2001 From: null8626 Date: Sun, 4 Feb 2024 18:05:35 +0700 Subject: [PATCH 4/6] refactor: more use of matches! --- src/implicit.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/implicit.rs b/src/implicit.rs index 8b665b4..45b3ad3 100644 --- a/src/implicit.rs +++ b/src/implicit.rs @@ -306,7 +306,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( found_e = true; } else if class == not_e { found_not_e = true; - } else if class == BidiClass::EN || class == BidiClass::AN { + } else if matches!(class, BidiClass::EN | BidiClass::AN) { // > Within this scope, bidirectional types EN and AN are treated as R. if e == BidiClass::L { found_not_e = true; @@ -335,10 +335,10 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( .iter_backwards_from(pair.start, pair.start_run) .map(|i| processing_classes[i]) .find(|class| { - *class == BidiClass::L - || *class == BidiClass::R - || *class == BidiClass::EN - || *class == BidiClass::AN + matches!( + class, + BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN + ) }) .unwrap_or(sequence.sos); From 8e1cb3de2f11fdca04f937c42e9d5d57fb2e4aa6 Mon Sep 17 00:00:00 2001 From: null8626 Date: Sun, 4 Feb 2024 18:09:18 +0700 Subject: [PATCH 5/6] refactor: use matches! here too --- src/implicit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/implicit.rs b/src/implicit.rs index 45b3ad3..5633aeb 100644 --- a/src/implicit.rs +++ b/src/implicit.rs @@ -343,7 +343,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( .unwrap_or(sequence.sos); // > Within this scope, bidirectional types EN and AN are treated as R. - if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN { + if matches!(previous_strong, BidiClass::EN | BidiClass::AN) { previous_strong = BidiClass::R; } From 1a7bd9ca47c40abdf13950f05069501dc2a9aece Mon Sep 17 00:00:00 2001 From: null8626 Date: Tue, 6 Feb 2024 00:30:13 +0700 Subject: [PATCH 6/6] meta: update MSRV --- .github/workflows/main.yml | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57d4361..303bac8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - rust: [1.36.0, stable, beta, nightly] + rust: [1.47.0, stable, beta, nightly] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -18,7 +18,7 @@ jobs: with: toolchain: ${{ matrix.rust }} - name: Unpin dependencies except on MSRV - if: matrix.rust != '1.36.0' + if: matrix.rust != '1.47.0' run: cargo update - run: cargo build --all-targets - run: cargo test diff --git a/Cargo.toml b/Cargo.toml index 4d1e014..24f110c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ documentation = "https://docs.rs/unicode-bidi/" keywords = ["rtl", "unicode", "text", "layout", "bidi"] readme="README.md" edition = "2018" -rust-version = "1.36.0" +rust-version = "1.47.0" categories = [ "no-std", "encoding",