From 2e40d1325b78cac21301e88e9c02af97a16e47d2 Mon Sep 17 00:00:00 2001 From: Koichi Akabe Date: Wed, 25 May 2022 11:03:51 +0900 Subject: [PATCH] Use length for checking invalid output instead of value (#37) * Use length for checking invalid output instead of value * fix --- src/builder.rs | 6 ++++-- src/charwise/builder.rs | 6 ++++-- src/nfa_builder.rs | 23 +++++------------------ 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 1b4a4f2..5bfeaee 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -1,7 +1,7 @@ use alloc::vec::Vec; use crate::errors::{DaachorseError, Result}; -use crate::nfa_builder::{NfaBuilder, DEAD_STATE_ID, ROOT_STATE_ID, VALUE_INVALID}; +use crate::nfa_builder::{NfaBuilder, DEAD_STATE_ID, ROOT_STATE_ID}; use crate::{ DoubleArrayAhoCorasick, MatchKind, State, DEAD_STATE_IDX, OUTPUT_POS_MAX, ROOT_STATE_IDX, }; @@ -218,10 +218,12 @@ impl DoubleArrayAhoCorasickBuilder { I: IntoIterator, P: AsRef<[u8]>, { + // The following code implicitly replaces large indices with 0, + // but build_with_values() returns an error variant for such iterators. let patvals = patterns .into_iter() .enumerate() - .map(|(i, p)| (p, i.try_into().unwrap_or(VALUE_INVALID))); + .map(|(i, p)| (p, i.try_into().unwrap_or(0))); self.build_with_values(patvals) } diff --git a/src/charwise/builder.rs b/src/charwise/builder.rs index e971ae9..5744535 100644 --- a/src/charwise/builder.rs +++ b/src/charwise/builder.rs @@ -7,7 +7,7 @@ use crate::errors::{DaachorseError, Result}; use crate::nfa_builder::NfaBuilder; use crate::charwise::{DEAD_STATE_IDX, ROOT_STATE_IDX}; -use crate::nfa_builder::{DEAD_STATE_ID, ROOT_STATE_ID, VALUE_INVALID}; +use crate::nfa_builder::{DEAD_STATE_ID, ROOT_STATE_ID}; // Specialized [`NfaBuilder`] handling labels of `char`. type CharwiseNfaBuilder = NfaBuilder; @@ -130,10 +130,12 @@ impl CharwiseDoubleArrayAhoCorasickBuilder { I: IntoIterator, P: AsRef, { + // The following code implicitly replaces large indices with 0, + // but build_with_values() returns an error variant for such iterators. let patvals = patterns .into_iter() .enumerate() - .map(|(i, p)| (p, i.try_into().unwrap_or(VALUE_INVALID))); + .map(|(i, p)| (p, i.try_into().unwrap_or(0))); self.build_with_values(patvals) } diff --git a/src/nfa_builder.rs b/src/nfa_builder.rs index a3de090..ef1a572 100644 --- a/src/nfa_builder.rs +++ b/src/nfa_builder.rs @@ -6,8 +6,6 @@ use alloc::vec::Vec; use crate::errors::{DaachorseError, Result}; use crate::{MatchKind, Output}; -// The maximum value of a pattern used as an invalid value. -pub const VALUE_INVALID: u32 = u32::MAX; // The maximum length of a pattern. pub const LENGTH_INVALID: u32 = 0; // The length used as an invalid value. @@ -50,7 +48,7 @@ impl Default for NfaBuilderState { Self { edges: EdgeMap::::default(), fail: ROOT_STATE_ID, - output: (VALUE_INVALID, LENGTH_INVALID), + output: (0, LENGTH_INVALID), output_pos: None, } } @@ -82,17 +80,6 @@ where #[inline(always)] pub(crate) fn add(&mut self, pattern: &[L], value: u32) -> Result<()> { - // Clippy suggests to use `==` in the following comparison instead, but that is dangerous. - // Since `VALUE_INVALID` is defined as a constant variable, so developers may forget to - // change this operator when they change the constant value. - #[allow(clippy::absurd_extreme_comparisons)] - if value >= VALUE_INVALID { - return Err(DaachorseError::invalid_argument( - "value", - "<", - VALUE_INVALID, - )); - } if pattern.len() > LENGTH_MAX as usize { return Err(DaachorseError::invalid_argument( "pattern.len()", @@ -109,7 +96,7 @@ where if self.match_kind.is_leftmost_first() { // If state_id has an output, the descendants will never searched. let output = &self.states[state_id as usize].borrow().output; - if output.0 != VALUE_INVALID { + if output.1 != LENGTH_INVALID { return Ok(()); } } @@ -130,7 +117,7 @@ where } let output = &mut self.states[state_id as usize].borrow_mut().output; - if output.0 != VALUE_INVALID { + if output.1 != LENGTH_INVALID { return Err(DaachorseError::duplicate_pattern(format!("{:?}", pattern))); } @@ -191,7 +178,7 @@ where let s = &mut self.states[state_id].borrow_mut(); // Sets the output state to the dead fail. - if s.output.0 != VALUE_INVALID { + if s.output.1 != LENGTH_INVALID { s.fail = DEAD_STATE_ID; } @@ -235,7 +222,7 @@ where for &state_id in q { let s = &mut self.states[state_id as usize].borrow_mut(); - if s.output.0 == VALUE_INVALID { + if s.output.1 == LENGTH_INVALID { s.output_pos = self.states[s.fail as usize].borrow().output_pos; continue; }