From 5e0cbda5998046b641f8ec9f3fd3a698f0417e5d Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Sat, 6 Jul 2024 16:11:54 +0200 Subject: [PATCH] feat: add compiler profile to prioritize memory usage Add a compiler profile option to CompilerBuilder. This will allow picking between: - speed: prioritize scanning speed (current impl & default) - memory: prioritize memory usage - automatic: pick between the two depending on the size of the rules. The memory profile is only implemented for the aho-corasick for the moment. Changing the regex engine used for validators could be an option, however it isn't trivial to do, and we would make implementing an "automatic" profile harder, since "automatic" only works if compilation is done when converting the compiler to the scanner. --- benches/src/bench.rs | 4 +++- boreal/src/compiler/builder.rs | 29 ++++++++++++++++++++++++++++- boreal/src/compiler/mod.rs | 31 ++++++++++++++++++++++++++++++- boreal/src/compiler/tests.rs | 3 ++- boreal/src/scanner/ac_scan.rs | 13 ++++++++----- boreal/src/scanner/mod.rs | 3 ++- 6 files changed, 73 insertions(+), 10 deletions(-) diff --git a/benches/src/bench.rs b/benches/src/bench.rs index 9e0fb463..301023d3 100644 --- a/benches/src/bench.rs +++ b/benches/src/bench.rs @@ -113,7 +113,9 @@ fn bench_scan_process(c: &mut Criterion) { } fn build_boreal_compiler() -> boreal::Compiler { - let mut boreal_compiler = boreal::Compiler::new(); + let mut boreal_compiler = boreal::compiler::CompilerBuilder::new() + .profile(boreal::compiler::CompilerProfile::Speed) + .build(); let _ = boreal_compiler.define_symbol("owner", "owner"); let _ = boreal_compiler.define_symbol("filename", "filename"); let _ = boreal_compiler.define_symbol("filepath", "filepath"); diff --git a/boreal/src/compiler/builder.rs b/boreal/src/compiler/builder.rs index aad36af1..364230cc 100644 --- a/boreal/src/compiler/builder.rs +++ b/boreal/src/compiler/builder.rs @@ -7,6 +7,9 @@ use super::{AvailableModule, ModuleLocation}; pub struct CompilerBuilder { /// Modules that can be imported when compiling rules. modules: HashMap<&'static str, AvailableModule>, + + /// Profile to use when compiling rules. + profile: super::CompilerProfile, } impl CompilerBuilder { @@ -75,20 +78,44 @@ impl CompilerBuilder { self } + /// Set the profile to use when compiling rules. + /// + /// By default, [`CompilerProfile::Speed`] is used. + #[must_use] + pub fn profile(mut self, profile: super::CompilerProfile) -> Self { + self.profile = profile; + self + } + /// Build a [`Compiler`] object with the configuration set on this builder. #[must_use] pub fn build(self) -> super::Compiler { - super::Compiler::build(self.modules) + super::Compiler::build(self.modules, self.profile) + } + + /// Get the profile to use when compiling rules. + #[must_use] + pub fn get_profile(&self) -> super::CompilerProfile { + self.profile } } #[cfg(test)] mod tests { use super::*; + use crate::compiler::CompilerProfile; use crate::test_helpers::test_type_traits_non_clonable; #[test] fn test_types_traits() { test_type_traits_non_clonable(CompilerBuilder::default()); } + + #[test] + fn test_getters() { + let builder = CompilerBuilder::default(); + + let builder = builder.profile(CompilerProfile::Memory); + assert_eq!(builder.get_profile(), CompilerProfile::Memory); + } } diff --git a/boreal/src/compiler/mod.rs b/boreal/src/compiler/mod.rs index d9baf247..296dd629 100644 --- a/boreal/src/compiler/mod.rs +++ b/boreal/src/compiler/mod.rs @@ -69,6 +69,9 @@ pub struct Compiler { /// Compilation parameters params: CompilerParams, + + /// Profile to use when compiling rules. + pub(crate) profile: CompilerProfile, } #[derive(Debug)] @@ -98,6 +101,27 @@ struct ImportedModule { module_index: usize, } +/// Profile to use when compiling rules. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum CompilerProfile { + /// Prioritize scan speed. + /// + /// This profile will strive to get the best possible scan speed by using more memory + /// when possible. + Speed, + /// Prioritize memory usage + /// + /// This profile will strive to reduce memory usage as much as possible, even if it means + /// a slower scan speed overall. + Memory, +} + +impl Default for CompilerProfile { + fn default() -> Self { + Self::Speed + } +} + impl Default for Compiler { fn default() -> Self { Self { @@ -113,6 +137,7 @@ impl Default for Compiler { external_symbols: Vec::new(), bytes_pool: BytesPoolBuilder::default(), params: CompilerParams::default(), + profile: CompilerProfile::default(), } } } @@ -143,9 +168,13 @@ impl Compiler { /// /// Returns false if a module with the same name is already registered, and the module /// was not added. - fn build(available_modules: HashMap<&'static str, AvailableModule>) -> Self { + fn build( + available_modules: HashMap<&'static str, AvailableModule>, + profile: CompilerProfile, + ) -> Self { Self { available_modules, + profile, ..Default::default() } } diff --git a/boreal/src/compiler/tests.rs b/boreal/src/compiler/tests.rs index 0a6d03c2..6d9c23d9 100644 --- a/boreal/src/compiler/tests.rs +++ b/boreal/src/compiler/tests.rs @@ -5,7 +5,7 @@ use super::module::compile_module; use super::rule::RuleCompiler; use super::{ AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler, - CompilerParams, ImportedModule, ModuleLocation, Namespace, + CompilerParams, CompilerProfile, ImportedModule, ModuleLocation, Namespace, }; use crate::bytes_pool::BytesPoolBuilder; use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable}; @@ -289,6 +289,7 @@ fn test_types_traits() { }), }); test_type_traits(CompilerParams::default()); + test_type_traits(CompilerProfile::default()); test_type_traits_non_clonable(AddRuleStatus { warnings: Vec::new(), statistics: Vec::new(), diff --git a/boreal/src/scanner/ac_scan.rs b/boreal/src/scanner/ac_scan.rs index baf741d8..b55a9137 100644 --- a/boreal/src/scanner/ac_scan.rs +++ b/boreal/src/scanner/ac_scan.rs @@ -7,6 +7,7 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind}; use super::{ScanError, ScanParams, StringMatch}; use crate::atoms::pick_atom_in_literal; use crate::compiler::variable::Variable; +use crate::compiler::CompilerProfile; use crate::matcher::{AcMatchStatus, Matcher}; use crate::memory::Region; @@ -79,7 +80,7 @@ impl ScanData<'_> { } impl AcScan { - pub(crate) fn new(variables: &[Variable]) -> Self { + pub(crate) fn new(variables: &[Variable], profile: CompilerProfile) -> Self { let mut lits = Vec::new(); let mut known_lits = HashMap::new(); let mut aho_index_to_literal_info = Vec::new(); @@ -128,9 +129,11 @@ impl AcScan { // optimizations are done. let mut builder = AhoCorasickBuilder::new(); - let builder = builder - .ascii_case_insensitive(true) - .kind(Some(AhoCorasickKind::DFA)); + let builder = builder.ascii_case_insensitive(true); + let builder = builder.kind(Some(match profile { + CompilerProfile::Speed => AhoCorasickKind::DFA, + CompilerProfile::Memory => AhoCorasickKind::ContiguousNFA, + })); // First try with a smaller size to reduce memory use and improve performances, otherwise // use the default version. @@ -312,7 +315,7 @@ mod tests { #[test] fn test_types_traits() { - test_type_traits_non_clonable(AcScan::new(&[])); + test_type_traits_non_clonable(AcScan::new(&[], CompilerProfile::Speed)); test_type_traits_non_clonable(LiteralInfo { variable_index: 0, literal_index: 0, diff --git a/boreal/src/scanner/mod.rs b/boreal/src/scanner/mod.rs index 64f233a0..d9309123 100644 --- a/boreal/src/scanner/mod.rs +++ b/boreal/src/scanner/mod.rs @@ -110,11 +110,12 @@ impl Scanner { imported_modules, external_symbols, bytes_pool, + profile, .. } = compiler; let namespaces = namespaces.into_iter().map(|v| v.name).collect(); - let ac_scan = ac_scan::AcScan::new(&variables); + let ac_scan = ac_scan::AcScan::new(&variables, profile); let mut external_symbols_values = Vec::new(); let mut external_symbols_map = HashMap::new();