Skip to content

Commit

Permalink
Implemented more efficient test str and test str any
Browse files Browse the repository at this point in the history
  • Loading branch information
vallentin committed Jul 10, 2023
1 parent fb001be commit d2476e6
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 21 deletions.
7 changes: 7 additions & 0 deletions text-scanner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ ext = []

[dependencies]
char-ranges = "0.1"

[dev-dependencies]
criterion = "0.5"

[[bench]]
name = "accept_vs_test"
harness = false
43 changes: 43 additions & 0 deletions text-scanner/benches/accept_vs_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use criterion::{criterion_group, criterion_main, Criterion};
use text_scanner::{Scanner, ScannerItem};

fn bench_accept_vs_test(c: &mut Criterion) {
let scanner = Scanner::new("// Hello World");

let mut group = c.benchmark_group("str");
group.bench_function("accept_str", |b| {
b.iter(|| -> ScannerItem<&'_ str> {
let mut scanner = scanner.clone();
scanner.accept_str("//").unwrap()
});
});
group.bench_function("test_str", |b| {
b.iter(|| -> ScannerItem<&'_ str> {
let mut scanner = scanner.clone();
scanner.test_str("//").unwrap()
});
});
group.finish();

let mut group = c.benchmark_group("str_any");
group.bench_function("accept_str_any", |b| {
b.iter(|| -> ScannerItem<&'_ str> {
let mut scanner = scanner.clone();
scanner
.accept_str_any(&["// foo", "// bar", "// baz", "//"])
.unwrap()
});
});
group.bench_function("test_str_any", |b| {
b.iter(|| -> ScannerItem<&'_ str> {
let mut scanner = scanner.clone();
scanner
.test_str_any(&["// foo", "// bar", "// baz", "//"])
.unwrap()
});
});
group.finish();
}

criterion_group!(benches, bench_accept_vs_test);
criterion_main!(benches);
113 changes: 92 additions & 21 deletions text-scanner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,22 @@ impl<'text> Scanner<'text> {
self.set_cursor_pos(0)
}

/// Calls `f` with a <code>&mut [Scanner]</code> of this
/// <code>&[Scanner]</code>, i.e. a [`Scanner`] with the
/// same [`text()`], [`remaining_text()`], and [`cursor_pos()`].
///
/// [`text()`]: Self::text
/// [`remaining_text()`]: Self::remaining_text
/// [`cursor_pos()`]: Self::cursor_pos
#[inline]
pub fn peeking<T, F>(&self, f: F) -> T
where
F: FnOnce(&mut Self) -> T,
{
let mut scanner = self.clone();
f(&mut scanner)
}

/// Advances the scanner cursor and returns the next
/// [`char`] and its [`Range`], if any.
///
Expand Down Expand Up @@ -651,6 +667,68 @@ impl<'text> Scanner<'text> {
}
}

#[inline]
fn test<T, F>(&mut self, f: F) -> ScannerResult<'text, T>
where
F: FnOnce(&Self) -> Option<(usize, T)>,
{
match f(self) {
Some((len_utf8, c)) => {
let start = self.cursor;
self.cursor += len_utf8;
Ok((start..self.cursor, c))
}
None => Err((self.cursor..self.cursor, "")),
}
}

/// This method is a more efficient version of [`accept_str()`], with the
/// condition that on `Err`, then the error value is always
/// <code>Err(([cursor]..[cursor], &quot;&quot;))</code>.
///
/// This makes the check more efficient, as this method does not have to
/// track the longest matching substring and its [`Range`] for the error value.
///
/// [`accept_str()`]: Self::accept_str
/// [cursor]: Self::cursor_pos
#[inline]
pub fn test_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str> {
self.test(|scanner| {
let text = scanner.remaining_text();
if text.starts_with(expected) {
let len = expected.len();
let expected = &text[..len];
Some((len, expected))
} else {
None
}
})
}

/// This method is a more efficient version of [`accept_str_any()`], with the
/// condition that on `Err`, then the error value is always
/// <code>Err(([cursor]..[cursor], &quot;&quot;))</code>.
///
/// This makes the check more efficient, as this method does not have to
/// track the longest matching substring and its [`Range`] for the error value.
///
/// [`accept_str_any()`]: Self::accept_str_any
/// [cursor]: Self::cursor_pos
#[inline]
pub fn test_str_any(&mut self, expected: &[&str]) -> ScannerResult<'text, &'text str> {
self.test(|scanner| {
let text = scanner.remaining_text();
for &expected in expected {
if text.starts_with(expected) {
let len = expected.len();
let expected = &text[..len];
return Some((len, expected));
}
}
None
})
}

/// Advances the scanner cursor and returns the next
/// [`char`] and its [`Range`], if `f(c)` returns `true`
/// where `c` is the next character.
Expand Down Expand Up @@ -765,13 +843,16 @@ impl<'text> Scanner<'text> {
/// in `expected`. If not, then an `Err` is returned, with the longest
/// matching substring and its [`Range`].
///
/// If `expected` is only 1 character, then use [`accept_char()`]
/// instead.
///
/// If the `Err` value is not needed, then use [`test_str()`]
/// instead for a more efficient test.
///
/// **Note:** The returned string slice has the same lifetime as
/// the original `text`, so the scanner can continue to be used
/// while this exists.
///
/// If `expected` is only 1 character, then use [`accept_char()`]
/// instead.
///
/// # Panics
///
/// Panics in non-optimized builds, if `expected` is [empty].
Expand Down Expand Up @@ -799,6 +880,7 @@ impl<'text> Scanner<'text> {
/// ```
///
/// [`accept_char()`]: Self::accept_char
/// [`test_str()`]: Self::test_str
/// [cursor]: Self::cursor_pos
/// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
pub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str> {
Expand Down Expand Up @@ -838,13 +920,16 @@ impl<'text> Scanner<'text> {
/// order of the strings into longest-to-shortest order,
/// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
///
/// If `expected` only contains 1 character strings, then use
/// [`accept_char_any()`] instead.
///
/// If the `Err` value is not needed, then use [`test_str_any()`]
/// instead for a more efficient test.
///
/// **Note:** The returned string slice has the same lifetime as
/// the original `text`, so the scanner can continue to be used
/// while this exists.
///
/// If `expected` only contains 1 character strings, then use
/// [`accept_char_any()`] instead.
///
/// # Panics
///
/// Panics in non-optimized builds, if `expected` is [empty],
Expand Down Expand Up @@ -878,6 +963,7 @@ impl<'text> Scanner<'text> {
/// ```
///
/// [`accept_char_any()`]: Self::accept_char_any
/// [`test_str_any()`]: Self::test_str_any
/// [cursor]: Self::cursor_pos
/// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
/// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
Expand Down Expand Up @@ -1387,21 +1473,6 @@ impl<'text> Scanner<'text> {
}
}

/// Calls `f` with a <code>&mut [Scanner]</code> of this
/// <code>&[Scanner]</code>, i.e. a [`Scanner`] with the
/// same [`text()`], [`remaining_text()`], and [`cursor_pos()`].
///
/// [`text()`]: Self::text
/// [`remaining_text()`]: Self::remaining_text
/// [`cursor_pos()`]: Self::cursor_pos
pub fn peeking<T, F>(&self, f: F) -> T
where
F: FnOnce(&mut Self) -> T,
{
let mut scanner = self.clone();
f(&mut scanner)
}

/// This function accepts the following formats:
///
/// - `0`
Expand Down

0 comments on commit d2476e6

Please sign in to comment.