Skip to content

Commit 25ba0ca

Browse files
committed
feat: add bytes from expressions into bytes pool
1 parent f9e7907 commit 25ba0ca

File tree

7 files changed

+57
-25
lines changed

7 files changed

+57
-25
lines changed

boreal/src/compiler/expression.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use super::rule::RuleCompiler;
1010
use super::{module, CompilationError};
1111
use crate::module::Type as ModuleType;
1212
use crate::regex::{regex_ast_to_hir, regex_hir_to_string, Regex};
13+
use crate::BytesSymbol;
1314

1415
/// Type of a parsed expression
1516
///
@@ -351,8 +352,8 @@ pub enum Expression {
351352
/// The value is the index into the external symbols vector stored in the compiled rules.
352353
ExternalSymbol(usize),
353354

354-
/// A byte string.
355-
Bytes(Vec<u8>),
355+
/// An interned byte string.
356+
Bytes(BytesSymbol),
356357

357358
/// A regex.
358359
Regex(Regex),
@@ -876,7 +877,7 @@ pub(super) fn compile_expression(
876877
Ok(Expr { expr, ty, span })
877878
}
878879
parser::ExpressionKind::Bytes(s) => Ok(Expr {
879-
expr: Expression::Bytes(s),
880+
expr: Expression::Bytes(compiler.bytes_pool.insert(&s)),
880881
ty: Type::Bytes,
881882
span,
882883
}),

boreal/src/compiler/module.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ impl ModuleUse<'_, '_> {
347347
// we can directly generate a primitive expression.
348348
StaticValue::Integer(v) => Expression::Integer(*v),
349349
StaticValue::Float(v) => Expression::Double(*v),
350-
StaticValue::Bytes(v) => Expression::Bytes(v.clone()),
350+
StaticValue::Bytes(v) => Expression::Bytes(self.compiler.bytes_pool.insert(&v)),
351351
StaticValue::Boolean(v) => Expression::Boolean(*v),
352352

353353
StaticValue::Object(_) => return None,

boreal/src/compiler/rule.rs

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ use boreal_parser::rule;
88
use super::expression::{compile_bool_expression, Expression, VariableIndex};
99
use super::external_symbol::ExternalSymbol;
1010
use super::{variable, CompilationError, CompilerParams, Namespace};
11-
use crate::bytes_pool::BytesPoolBuilder;
12-
use crate::bytes_pool::BytesSymbol;
13-
use crate::bytes_pool::StringSymbol;
11+
use crate::bytes_pool::{BytesPoolBuilder, BytesSymbol, StringSymbol};
1412
use crate::module::Type as ModuleType;
1513
use crate::statistics;
1614

@@ -105,6 +103,9 @@ pub(super) struct RuleCompiler<'a> {
105103

106104
/// Warnings emitted while compiling the rule.
107105
pub warnings: Vec<CompilationError>,
106+
107+
/// Bytes intern pool.
108+
pub bytes_pool: &'a mut BytesPoolBuilder,
108109
}
109110

110111
/// Helper struct used to track variables being compiled in a rule.
@@ -126,6 +127,7 @@ impl<'a> RuleCompiler<'a> {
126127
namespace: &'a Namespace,
127128
external_symbols: &'a Vec<ExternalSymbol>,
128129
params: &'a CompilerParams,
130+
bytes_pool: &'a mut BytesPoolBuilder,
129131
) -> Result<Self, CompilationError> {
130132
let mut names_set = HashSet::new();
131133
let mut variables = Vec::with_capacity(rule_variables.len());
@@ -153,6 +155,7 @@ impl<'a> RuleCompiler<'a> {
153155
params,
154156
condition_depth: 0,
155157
warnings: Vec::new(),
158+
bytes_pool,
156159
})
157160
}
158161

@@ -249,7 +252,26 @@ pub(super) fn compile_rule(
249252
}
250253
}
251254

252-
let mut compiler = RuleCompiler::new(&rule.variables, namespace, external_symbols, params)?;
255+
let metadatas: Vec<_> = rule
256+
.metadatas
257+
.into_iter()
258+
.map(|rule::Metadata { name, value }| Metadata {
259+
name: bytes_pool.insert_str(&name),
260+
value: match value {
261+
rule::MetadataValue::Bytes(v) => MetadataValue::Bytes(bytes_pool.insert(&v)),
262+
rule::MetadataValue::Integer(v) => MetadataValue::Integer(v),
263+
rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v),
264+
},
265+
})
266+
.collect();
267+
268+
let mut compiler = RuleCompiler::new(
269+
&rule.variables,
270+
namespace,
271+
external_symbols,
272+
params,
273+
bytes_pool,
274+
)?;
253275
let condition = compile_bool_expression(&mut compiler, rule.condition)?;
254276

255277
let mut variables = Vec::with_capacity(rule.variables.len());
@@ -275,20 +297,7 @@ pub(super) fn compile_rule(
275297
name: rule.name,
276298
namespace_index,
277299
tags: rule.tags.into_iter().map(|v| v.tag).collect(),
278-
metadatas: rule
279-
.metadatas
280-
.into_iter()
281-
.map(|rule::Metadata { name, value }| Metadata {
282-
name: bytes_pool.insert_str(&name),
283-
value: match value {
284-
rule::MetadataValue::Bytes(v) => {
285-
MetadataValue::Bytes(bytes_pool.insert(&v))
286-
}
287-
rule::MetadataValue::Integer(v) => MetadataValue::Integer(v),
288-
rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v),
289-
},
290-
})
291-
.collect(),
300+
metadatas,
292301
nb_variables: variables.len(),
293302
condition,
294303
is_private: rule.is_private,
@@ -326,6 +335,7 @@ mod tests {
326335
params: &CompilerParams::default(),
327336
condition_depth: 0,
328337
warnings: Vec::new(),
338+
bytes_pool: &mut BytesPoolBuilder::default(),
329339
});
330340
let build_rule = || Rule {
331341
name: "a".to_owned(),

boreal/src/compiler/tests.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use super::{
77
AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler,
88
CompilerParams, ImportedModule, ModuleLocation, Namespace,
99
};
10+
use crate::bytes_pool::BytesPoolBuilder;
1011
use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable};
1112
use boreal_parser::parse;
1213

@@ -30,12 +31,14 @@ fn compile_expr(expression_str: &str, expected_type: Type) {
3031
assert!(compiler.define_symbol("sym_bool", true));
3132
assert!(compiler.define_symbol("sym_bytes", "keyboard"));
3233

34+
let mut bytes_pool = BytesPoolBuilder::default();
3335
let ns = Namespace::default();
3436
let mut rule_compiler = RuleCompiler::new(
3537
&rule.variables,
3638
&ns,
3739
&compiler.external_symbols,
3840
&compiler.params,
41+
&mut bytes_pool,
3942
)
4043
.unwrap();
4144
let res = compile_expression(&mut rule_compiler, rule.condition).unwrap();

boreal/src/compiler/variable.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ mod tests {
154154
use boreal_parser::rule::VariableModifiers;
155155

156156
use super::*;
157+
use crate::bytes_pool::BytesPoolBuilder;
157158
use crate::compiler::{CompilerParams, Namespace};
158159
use crate::regex::Regex;
159160
use crate::test_helpers::test_type_traits_non_clonable;
@@ -169,6 +170,7 @@ mod tests {
169170
params: &CompilerParams::default(),
170171
condition_depth: 0,
171172
warnings: Vec::new(),
173+
bytes_pool: &mut BytesPoolBuilder::default(),
172174
};
173175
test_type_traits_non_clonable(
174176
compile_variable(

boreal/src/evaluator/mod.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
//! - `defined`
3636
//!
3737
//! For all of those, an undefined value is considered to be equivalent to a false boolean value.
38+
use crate::bytes_pool::BytesPool;
3839
use crate::compiler::expression::{Expression, ForIterator, ForSelection, VariableIndex};
3940
use crate::compiler::rule::Rule;
4041
#[cfg(feature = "object")]
@@ -112,13 +113,15 @@ pub(crate) fn evaluate_rule<'scan>(
112113
rule: &Rule,
113114
var_matches: Option<&'scan [Vec<variable::StringMatch>]>,
114115
previous_rules_results: &'scan [bool],
116+
bytes_pool: &'scan BytesPool,
115117
scan_data: &'scan mut ScanData,
116118
) -> Result<bool, EvalError> {
117119
let mut evaluator = Evaluator {
118120
var_matches: var_matches.map(variable::VarMatches::new),
119121
previous_rules_results,
120122
currently_selected_variable_index: None,
121123
bounded_identifiers_stack: Vec::new(),
124+
bytes_pool,
122125
scan_data,
123126
};
124127
match evaluator.evaluate_expr(&rule.condition) {
@@ -145,6 +148,9 @@ struct Evaluator<'scan, 'rule, 'mem> {
145148
// Stack of bounded identifiers to their integer values.
146149
bounded_identifiers_stack: Vec<ModuleValue>,
147150

151+
// Bytes intern pool, used to resolve expressions that stored bytes in the pool.
152+
bytes_pool: &'rule BytesPool,
153+
148154
// Data related only to the scan, independent of the rule.
149155
scan_data: &'rule mut ScanData<'scan, 'mem>,
150156
}
@@ -679,7 +685,7 @@ impl Evaluator<'_, '_, '_> {
679685

680686
Expression::Integer(v) => Ok(Value::Integer(*v)),
681687
Expression::Double(v) => Ok(Value::Float(*v)),
682-
Expression::Bytes(v) => Ok(Value::Bytes(v.clone())),
688+
Expression::Bytes(v) => Ok(Value::Bytes(self.bytes_pool.get(*v).to_vec())),
683689
Expression::Regex(v) => Ok(Value::Regex(v.clone())),
684690
Expression::Boolean(v) => Ok(Value::Boolean(*v)),
685691
}

boreal/src/scanner/mod.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ impl EvalContext {
709709
rule,
710710
var_matches.as_deref(),
711711
&self.previous_results,
712+
&scanner.bytes_pool,
712713
scan_data,
713714
)?;
714715

@@ -989,13 +990,22 @@ mod tests {
989990
let mut previous_results = Vec::new();
990991
let rules = &scanner.inner.rules;
991992
for rule in &rules[..(rules.len() - 1)] {
992-
previous_results
993-
.push(evaluate_rule(rule, None, &previous_results, &mut scan_data).unwrap());
993+
previous_results.push(
994+
evaluate_rule(
995+
rule,
996+
None,
997+
&previous_results,
998+
&scanner.inner.bytes_pool,
999+
&mut scan_data,
1000+
)
1001+
.unwrap(),
1002+
);
9941003
}
9951004
let last_res = evaluate_rule(
9961005
&rules[rules.len() - 1],
9971006
None,
9981007
&previous_results,
1008+
&scanner.inner.bytes_pool,
9991009
&mut scan_data,
10001010
);
10011011

0 commit comments

Comments
 (0)