From 19d8f2dbc356b837cd32af6ba909740b69ac7ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Dupr=C3=A9?= Date: Wed, 7 Feb 2024 19:16:50 +0100 Subject: [PATCH] Make `slice_str` similar to `truncate_str` --- src/ansi.rs | 90 ---------------------------- src/lib.rs | 6 +- src/utils.rs | 165 ++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 121 insertions(+), 140 deletions(-) diff --git a/src/ansi.rs b/src/ansi.rs index eb7ccff0..3a3c96c3 100644 --- a/src/ansi.rs +++ b/src/ansi.rs @@ -4,8 +4,6 @@ use std::{ str::CharIndices, }; -use crate::utils::char_width; - #[derive(Debug, Clone, Copy)] enum State { Start, @@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> { impl<'a> FusedIterator for AnsiCodeIterator<'a> {} -/// Slice a `&str` in terms of text width. This means that only the text -/// columns strictly between `start` and `stop` will be kept. -/// -/// If a multi-columns character overlaps with the end of the interval it will -/// not be included. In such a case, the result will be less than `end - start` -/// columns wide. -pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str { - if end <= start { - return ""; - } - - let mut pos = 0; - let mut res_start = 0; - let mut res_end = 0; - - 'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) { - // As ansi symbols have a width of 0 we can safely early-interupt - // the outer for loop only if current pos strictly greater than - // `end`. - if pos > end { - break; - } - - if is_ansi { - if pos < start { - res_start += sub.len(); - res_end = res_start; - } else if pos <= end { - res_end += sub.len(); - } else { - break 'outer; - } - } else { - for c in sub.chars() { - let c_width = char_width(c); - - if pos < start { - res_start += c.len_utf8(); - res_end = res_start; - } else if pos + c_width <= end { - res_end += c.len_utf8(); - } else { - break 'outer; - } - - pos += char_width(c); - } - } - } - - &s[res_start..res_end] -} - #[cfg(test)] mod tests { - use crate::measure_text_width; - use super::*; use lazy_static::lazy_static; @@ -492,37 +435,4 @@ mod tests { assert_eq!(iter.rest_slice(), ""); assert_eq!(iter.next(), None); } - - #[test] - fn test_slice_ansi_str() { - // Note that 🐶 is two columns wide - let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; - assert_eq!(slice_ansi_str(test_str, 5, 5), ""); - assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str); - - if cfg!(feature = "unicode-width") { - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m"); - assert_eq!(measure_text_width(test_str), 16); - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m"); - assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!"); - assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!"); - assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!"); - - assert_eq!( - slice_ansi_str(test_str, 4, 9), - "o\x1b[31m🐶\x1b[1m🐶\x1b[0m" - ); - } else { - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m"); - - assert_eq!( - slice_ansi_str(test_str, 4, 9), - "o\x1b[31m🐶\x1b[1m🐶\x1b[0m w" - ); - } - } } diff --git a/src/lib.rs b/src/lib.rs index f57e2c80..a7fbb935 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,12 +82,12 @@ pub use crate::term::{ }; pub use crate::utils::{ colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with, - set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute, - Color, Emoji, Style, StyledObject, + set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment, + Attribute, Color, Emoji, Style, StyledObject, }; #[cfg(feature = "ansi-parsing")] -pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator}; +pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator}; mod common_term; mod kb; diff --git a/src/utils.rs b/src/utils.rs index 868c0b5c..c7448378 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::collections::BTreeSet; use std::env; use std::fmt; +use std::ops::Range; use std::sync::atomic::{AtomicBool, Ordering}; use lazy_static::lazy_static; @@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize { } #[cfg(feature = "ansi-parsing")] -pub(crate) fn char_width(c: char) -> usize { +fn char_width(c: char) -> usize { #[cfg(feature = "unicode-width")] { use unicode_width::UnicodeWidthChar; @@ -737,66 +738,94 @@ pub(crate) fn char_width(c: char) -> usize { } } -/// Truncates a string to a certain number of characters. +/// Slice a `&str` in terms of text width. This means that only the text +/// columns strictly between `start` and `stop` will be kept. +/// +/// If a multi-columns character overlaps with the end of the interval it will +/// not be included. In such a case, the result will be less than `end - start` +/// columns wide. +/// +/// If non-empty head and tail are specified, they are inserted between the +/// ANSI symbols from truncated bounds and the slice. /// /// This ensures that escape codes are not screwed up in the process. -/// If the maximum length is hit the string will be truncated but -/// escapes code will still be honored. If truncation takes place -/// the tail string will be appended. -pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { +pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range, tail: &str) -> Cow<'a, str> { #[cfg(feature = "ansi-parsing")] { - use std::cmp::Ordering; - let mut iter = AnsiCodeIterator::new(s); - let mut length = 0; - let mut rv = None; - - while let Some(item) = iter.next() { - match item { - (s, false) => { - if rv.is_none() { - if str_width(s) + length > width - str_width(tail) { - let ts = iter.current_slice(); - - let mut s_byte = 0; - let mut s_width = 0; - let rest_width = width - str_width(tail) - length; - for c in s.chars() { - s_byte += c.len_utf8(); - s_width += char_width(c); - match s_width.cmp(&rest_width) { - Ordering::Equal => break, - Ordering::Greater => { - s_byte -= c.len_utf8(); - break; - } - Ordering::Less => continue, - } - } - - let idx = ts.len() - s.len() + s_byte; - let mut buf = ts[..idx].to_string(); - buf.push_str(tail); - rv = Some(buf); - } - length += str_width(s); - } + let mut pos = 0; + let mut slice = 0..0; + + // ANSI symbols outside of the slice + let mut front_ansi = String::new(); + let mut back_ansi = String::new(); + + // Iterate through each ANSI symbol or unicode character while keeping + // track of: + // - pos: cumulated width of characters iterated so far + // - slice: char indices of the part of the string for which `pos` + // was inside bounds + for (sub, is_ansi) in AnsiCodeIterator::new(s) { + if is_ansi { + if pos < bounds.start { + // An ANSI symbol before the interval: keep for later + front_ansi.push_str(sub); + slice.start += sub.len(); + slice.end = slice.start; + } else if pos <= bounds.end { + // An ANSI symbol inside of the interval: extend the slice + slice.end += sub.len(); + } else { + // An ANSI symbol after the interval: keep for later + back_ansi.push_str(sub); } - (s, true) => { - if let Some(ref mut rv) = rv { - rv.push_str(s); + } else { + for c in sub.chars() { + let c_width = char_width(c); + + if pos < bounds.start { + // The char is before the interval: move the slice back + slice.start += c.len_utf8(); + slice.end = slice.start; + } else if pos + c_width <= bounds.end { + // The char fits into the interval: extend the slice + slice.end += c.len_utf8(); } + + pos += c_width; } } } - if let Some(buf) = rv { - Cow::Owned(buf) + let slice = &s[slice]; + + if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() { + Cow::Borrowed(slice) } else { - Cow::Borrowed(s) + Cow::Owned(front_ansi + head + slice + tail + &back_ansi) } } + #[cfg(not(feature = "ansi-parsing"))] + { + Cow::Borrowed(s.get(start..end).unwrap_or_default()) + } +} +/// Truncates a string to a certain number of characters. +/// +/// This ensures that escape codes are not screwed up in the process. +/// If the maximum length is hit the string will be truncated but +/// escapes code will still be honored. If truncation takes place +/// the tail string will be appended. +pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { + #[cfg(feature = "ansi-parsing")] + { + if measure_text_width(s) > width { + let tail_width = measure_text_width(tail); + slice_str(s, "", 0..width.saturating_sub(tail_width), tail) + } else { + Cow::Borrowed(s) + } + } #[cfg(not(feature = "ansi-parsing"))] { if s.len() <= width - tail.len() { @@ -919,8 +948,50 @@ fn test_truncate_str() { ); } +#[test] +fn test_slice_ansi_str() { + // Note that 🐶 is two columns wide + let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; + assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str); + + if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") { + assert_eq!(measure_text_width(test_str), 16); + + assert_eq!( + slice_str(test_str, "", 5..5, ""), + "\u{1b}[31m\u{1b}[1m\u{1b}[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..5, ""), + "Hello\x1b[31m\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..6, ""), + "Hello\x1b[31m\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..7, ""), + "Hello\x1b[31m🐶\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 4..9, ""), + "o\x1b[31m🐶\x1b[1m🐶\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 7..21, ""), + "\x1b[31m\x1b[1m🐶\x1b[0m world!" + ); + } +} + #[test] fn test_truncate_str_no_ansi() { + assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar"); assert_eq!(&truncate_str("foo bar", 5, ""), "foo b"); assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !"); assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");