From a914340ca2430e6bd9eed656b02bf2d6d968666a Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Tue, 26 Dec 2023 23:43:17 +0100 Subject: [PATCH 1/3] feat: handle access to contiguous fragmented regions For some expressions, we want to be able to make them work even if regions are chunked. To do so, we need a new API to iterate on multiple distinct byte slices. This is done with the new Memory::on_range method. This uses a callback instead of returning an iterator to guarantee that only one region can be fetched at a given time, which is a constraint used in the memory trait. --- boreal/src/evaluator/read_integer.rs | 2 +- boreal/src/memory.rs | 146 +++++++++- boreal/src/module/hash.rs | 72 +++-- boreal/src/module/math.rs | 396 ++++++++++++++++++--------- boreal/tests/it/fragmented.rs | 63 ++++- 5 files changed, 499 insertions(+), 180 deletions(-) diff --git a/boreal/src/evaluator/read_integer.rs b/boreal/src/evaluator/read_integer.rs index 5827c9e7..09b2441a 100644 --- a/boreal/src/evaluator/read_integer.rs +++ b/boreal/src/evaluator/read_integer.rs @@ -26,7 +26,7 @@ pub(super) fn evaluate_read_integer( let mem = addr .checked_add(length) - .and_then(|end| evaluator.scan_data.mem.get(addr, end)) + .and_then(|end| evaluator.scan_data.mem.get_contiguous(addr, end)) .ok_or(PoisonKind::Undefined)?; match ty { diff --git a/boreal/src/memory.rs b/boreal/src/memory.rs index e2b00b3f..2b0ca041 100644 --- a/boreal/src/memory.rs +++ b/boreal/src/memory.rs @@ -74,14 +74,22 @@ impl Memory<'_> { /// Retrieve the data that matches the given range, potentially truncated. /// /// This will fetch the memory region containing this range and return the data slice - /// matching the exact range, possibly truncated. + /// matching the exact range. /// /// If the start does not belong to any memory region, None is returned. /// - /// If the end is after the end of the region, the slice is truncated. - // TODO: return an iterator if the range overlaps multiple regions that are contiguous? + /// This will only return data from the first memory chunk that intersects the provided + /// range, and only if this chunk contains the full range. For this reason, in almost + /// all use cases, it is recommended to use the [`Memory::on_range`] instead, which + /// will iterate properly on all chunks covering the provided range. + /// This function exists mostly for retrieval of very small byte slices, such as for + /// the `uintXX(offset)` expressions. #[must_use] - pub fn get(&mut self, start: usize, end: usize) -> Option<&[u8]> { + pub(crate) fn get_contiguous(&mut self, start: usize, end: usize) -> Option<&[u8]> { + if end < start { + return None; + } + match self { Self::Direct(mem) => { if start >= mem.len() { @@ -104,8 +112,7 @@ impl Memory<'_> { if relative_start >= region.length { continue; } - let end = end.checked_sub(region.start)?; - let end = std::cmp::min(region.length, end); + let end = std::cmp::min(region.length, end - region.start); let region = fragmented.obj.fetch(&fragmented.params)?; return region.mem.get(relative_start..end); @@ -115,6 +122,125 @@ impl Memory<'_> { } } } + + /// Call a callback on each data slice covering a given range. + /// + /// This will fetch the memory regions that contain data in this range, and call the + /// callback on each contiguous data slice. + /// + /// As soon as the range is entirely covered or contains indexes that are not covered by any + /// region, the iteration will end. + /// + /// In other words: + /// + /// - The first data slice starts on the starting index of the range. + /// - If the callback is called multiple times, each data slice is guaranteed to be exactly + /// following the previous one (no undefined bytes in between). + /// - The range may not be covered entirely, e.g. asking for `[0; 2*filesize[` will lead + /// to the callback being called once, on `[0; filesize[`. + /// + /// `None` is returned if either: + /// - the callback has not been called at least once (so no memory region contains the + /// starting bytes of the range). + /// - there are undefined bytes in between two regions covering the range. + /// - a region cannot be fetched. + /// + /// For example, when providing the range `[50; 100[`, and with regions: + /// + /// - `[0;70[` + /// - `[70; 80[` + /// - `[80; 150[` + /// + /// The callback will be called with `[50; 70[`, `[70; 80[` and `[80; 100[` then + /// `Some(())` will be returned. + /// + /// with regions: + /// + /// - `[0;70[` + /// - `[70; 80[` + /// - `[90; 150[` + /// + /// The callback will be called with `[50; 70[` and `[70; 80[`, then `None` will + /// be returned. + /// + /// with regions: + /// + /// - `[0;70[` + /// + /// The callback will be called with `[50; 70[` only, then `Some(())` will be + /// returned. + /// + /// And with regions: + /// + /// - `[60;70[` + /// + /// The callback will not be called at all, and `None` will be returned. + #[must_use] + pub fn on_range(&mut self, mut start: usize, end: usize, mut cb: F) -> Option<()> + where + F: FnMut(&[u8]), + { + if end < start { + return None; + } + + match self { + Self::Direct(mem) => { + if start >= mem.len() { + None + } else { + let end = std::cmp::min(mem.len(), end); + cb(&mem[start..end]); + Some(()) + } + } + Self::Fragmented(fragmented) => { + if !fragmented.params.can_refetch_regions { + return None; + } + + let mut has_called_cb = false; + + fragmented.obj.reset(); + while let Some(region) = fragmented.obj.next(&fragmented.params) { + // If we already called the callback once, the next regions should + // be contiguous. + if has_called_cb && start != region.start { + return None; + } + // Adjust starting offset relative to the region base + let Some(relative_start) = start.checked_sub(region.start) else { + break; + }; + if relative_start >= region.length { + continue; + } + + // Adjust ending offset relative to the region base and length + let relative_end = std::cmp::min(region.length, end - region.start); + + let Some(fetched_region) = fragmented.obj.fetch(&fragmented.params) else { + return None; + }; + cb(&fetched_region.mem[relative_start..relative_end]); + has_called_cb = true; + + // Update the starting offset for the next region. + start = region.start.checked_add(region.length)?; + if start >= end { + // Range has been entirely covered. + break; + } + } + + if has_called_cb { + Some(()) + } else { + None + } + } + } + } } /// Memory to scan, fragmented into different regions. @@ -221,4 +347,12 @@ mod tests { }, }); } + + #[test] + #[cfg_attr(coverage_nightly, coverage(off))] + fn test_proper_range() { + let mut mem = Memory::Direct(b"abc"); + assert_eq!(mem.get_contiguous(2, 1), None); + assert_eq!(mem.on_range(2, 1, |_| {}), None); + } } diff --git a/boreal/src/module/hash.rs b/boreal/src/module/hash.rs index caaf63c3..4d74fcb0 100644 --- a/boreal/src/module/hash.rs +++ b/boreal/src/module/hash.rs @@ -82,14 +82,25 @@ impl ModuleData for Hash { type Data = Data; } -fn compute_hash(bytes: &[u8]) -> Value { +fn compute_hash_from_bytes(bytes: &[u8]) -> Value { Value::bytes(hex::encode(D::digest(bytes))) } +fn compute_hash_from_mem( + ctx: &mut EvalContext, + offset: usize, + end: usize, +) -> Option { + let mut digest = D::new(); + + ctx.mem.on_range(offset, end, |data| digest.update(data))?; + Some(Value::bytes(hex::encode(digest.finalize()))) +} + impl Hash { fn md5(ctx: &mut EvalContext, args: Vec) -> Option { match get_args(args)? { - Args::Bytes(s) => Some(compute_hash::(&s)), + Args::Bytes(s) => Some(compute_hash_from_bytes::(&s)), Args::Range(offset, end) => { let data = ctx.module_data.get::()?; @@ -99,7 +110,7 @@ impl Hash { } } - let hash = compute_hash::(ctx.mem.get(offset, end)?); + let hash = compute_hash_from_mem::(ctx, offset, end)?; let _r = data .cache .write() @@ -113,7 +124,7 @@ impl Hash { fn sha1(ctx: &mut EvalContext, args: Vec) -> Option { match get_args(args)? { - Args::Bytes(s) => Some(compute_hash::(&s)), + Args::Bytes(s) => Some(compute_hash_from_bytes::(&s)), Args::Range(offset, end) => { let data = ctx.module_data.get::()?; @@ -123,7 +134,7 @@ impl Hash { } } - let hash = compute_hash::(ctx.mem.get(offset, end)?); + let hash = compute_hash_from_mem::(ctx, offset, end)?; let _r = data .cache .write() @@ -137,7 +148,7 @@ impl Hash { fn sha2(ctx: &mut EvalContext, args: Vec) -> Option { match get_args(args)? { - Args::Bytes(s) => Some(compute_hash::(&s)), + Args::Bytes(s) => Some(compute_hash_from_bytes::(&s)), Args::Range(offset, end) => { let data = ctx.module_data.get::()?; @@ -147,7 +158,7 @@ impl Hash { } } - let hash = compute_hash::(ctx.mem.get(offset, end)?); + let hash = compute_hash_from_mem::(ctx, offset, end)?; let _r = data .cache .write() @@ -160,19 +171,36 @@ impl Hash { } fn checksum32(ctx: &mut EvalContext, args: Vec) -> Option { - apply(ctx, args, |s| { - let checksum = s - .iter() - .fold(0u32, |acc, byte| acc.wrapping_add(u32::from(*byte))); - Value::Integer(i64::from(checksum)) - }) + let mut checksum: u32 = 0; + match get_args(args)? { + Args::Bytes(s) => { + for b in s { + checksum = checksum.wrapping_add(u32::from(b)); + } + } + Args::Range(offset, end) => { + ctx.mem.on_range(offset, end, |data| { + for b in data { + checksum = checksum.wrapping_add(u32::from(*b)); + } + })?; + } + } + Some(Value::Integer(i64::from(checksum))) } fn crc32(ctx: &mut EvalContext, args: Vec) -> Option { - apply(ctx, args, |s| { - let crc = crc32fast::hash(s); - Value::Integer(i64::from(crc)) - }) + match get_args(args)? { + Args::Bytes(s) => { + let crc = crc32fast::hash(&s); + Some(Value::Integer(i64::from(crc))) + } + Args::Range(offset, end) => { + let mut hasher = crc32fast::Hasher::new(); + ctx.mem.on_range(offset, end, |data| hasher.update(data))?; + Some(Value::Integer(i64::from(hasher.finalize()))) + } + } } } @@ -200,13 +228,3 @@ fn get_args(args: Vec) -> Option { _ => None, } } - -fn apply(ctx: &mut EvalContext, args: Vec, fun: F) -> Option -where - F: FnOnce(&[u8]) -> Value, -{ - match get_args(args)? { - Args::Bytes(s) => Some(fun(&s)), - Args::Range(offset, end) => Some(fun(ctx.mem.get(offset, end)?)), - } -} diff --git a/boreal/src/module/math.rs b/boreal/src/module/math.rs index 4c1feb0f..f71c16be 100644 --- a/boreal/src/module/math.rs +++ b/boreal/src/module/math.rs @@ -132,14 +132,6 @@ impl Module for Math { } } -fn get_mem_slice<'a>(ctx: &'a mut EvalContext, offset: i64, length: i64) -> Option<&'a [u8]> { - let start: usize = offset.try_into().ok()?; - let length: usize = length.try_into().ok()?; - let end = start.checked_add(length)?; - - ctx.mem.get(start, end) -} - impl Math { fn in_range(_ctx: &mut EvalContext, args: Vec) -> Option { let mut args = args.into_iter(); @@ -155,13 +147,15 @@ impl Math { let deviation = match args.next()? { Value::Bytes(bytes) => { let mean: f64 = args.next()?.try_into().ok()?; - compute_deviation(&bytes, mean) + compute_deviation(distribution_from_bytes(&bytes), mean) } Value::Integer(offset) => { let length: i64 = args.next()?.try_into().ok()?; let mean: f64 = args.next()?.try_into().ok()?; - compute_deviation(get_mem_slice(ctx, offset, length)?, mean) + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + compute_deviation(distribution(ctx, start, length)?, mean) } _ => return None, }; @@ -172,11 +166,11 @@ impl Math { fn mean(ctx: &mut EvalContext, args: Vec) -> Option { let mut args = args.into_iter(); let mean = match args.next()? { - Value::Bytes(bytes) => compute_mean(&bytes), + Value::Bytes(bytes) => compute_from_bytes(&bytes, Mean::new())?, Value::Integer(offset) => { let length: i64 = args.next()?.try_into().ok()?; - compute_mean(get_mem_slice(ctx, offset, length)?) + compute_from_mem(ctx, offset, length, Mean::new())? } _ => return None, }; @@ -187,11 +181,11 @@ impl Math { fn serial_correlation(ctx: &mut EvalContext, args: Vec) -> Option { let mut args = args.into_iter(); let scc = match args.next()? { - Value::Bytes(bytes) => compute_serial_correlation(&bytes), + Value::Bytes(bytes) => compute_from_bytes(&bytes, SerialCorrelation::new())?, Value::Integer(offset) => { let length: i64 = args.next()?.try_into().ok()?; - compute_serial_correlation(get_mem_slice(ctx, offset, length)?) + compute_from_mem(ctx, offset, length, SerialCorrelation::new())? } _ => return None, }; @@ -202,30 +196,33 @@ impl Math { fn monte_carlo_pi(ctx: &mut EvalContext, args: Vec) -> Option { let mut args = args.into_iter(); let mc = match args.next()? { - Value::Bytes(bytes) => compute_monte_carlo_pi(&bytes), + Value::Bytes(bytes) => compute_from_bytes(&bytes, MonteCarloPi::new())?, Value::Integer(offset) => { let length: i64 = args.next()?.try_into().ok()?; - compute_monte_carlo_pi(get_mem_slice(ctx, offset, length)?) + compute_from_mem(ctx, offset, length, MonteCarloPi::new())? } _ => return None, }; - mc.map(Value::Float) + Some(Value::Float(mc)) } fn entropy(ctx: &mut EvalContext, args: Vec) -> Option { let mut args = args.into_iter(); - let entropy = match args.next()? { - Value::Bytes(bytes) => compute_entropy(&bytes), + let distribution = match args.next()? { + Value::Bytes(bytes) => distribution_from_bytes(&bytes), Value::Integer(offset) => { let length: i64 = args.next()?.try_into().ok()?; - compute_entropy(get_mem_slice(ctx, offset, length)?) + + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + distribution(ctx, start, length)? } _ => return None, }; - Some(Value::Float(entropy)) + Some(Value::Float(compute_entropy(distribution))) } fn min(_ctx: &mut EvalContext, args: Vec) -> Option { @@ -294,16 +291,16 @@ impl Math { let dist = match (args.next(), args.next()) { (Some(Value::Integer(offset)), Some(Value::Integer(length))) => { - distribution(get_mem_slice(ctx, offset, length)?) + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + distribution(ctx, start, length)? } - (None, None) => match ctx.mem.get_direct() { - Some(mem) => distribution(mem), - None => return None, - }, + (None, None) => distribution_from_bytes(ctx.mem.get_direct()?), _ => return None, }; - dist.get(byte as usize) + dist.counters + .get(byte as usize) .and_then(|v| i64::try_from(*v).ok()) .map(Value::Integer) } @@ -315,19 +312,17 @@ impl Math { let dist = match (args.next(), args.next()) { (Some(Value::Integer(offset)), Some(Value::Integer(length))) => { - distribution(get_mem_slice(ctx, offset, length)?) + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + distribution(ctx, start, length)? } - (None, None) => match ctx.mem.get_direct() { - Some(mem) => distribution(mem), - None => return None, - }, + (None, None) => distribution_from_bytes(ctx.mem.get_direct()?), _ => return None, }; - let count = dist.get(byte)?; - let sum: u64 = dist.iter().sum(); + let count = dist.counters.get(byte)?; - Some(Value::Float((*count as f64) / (sum as f64))) + Some(Value::Float((*count as f64) / (dist.nb_values as f64))) } fn mode(ctx: &mut EvalContext, args: Vec) -> Option { @@ -335,18 +330,23 @@ impl Math { let dist = match (args.next(), args.next()) { (Some(Value::Integer(offset)), Some(Value::Integer(length))) => { - distribution(get_mem_slice(ctx, offset, length)?) + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + distribution(ctx, start, length)? } - (None, None) => match ctx.mem.get_direct() { - Some(mem) => distribution(mem), - None => return None, - }, + (None, None) => distribution_from_bytes(ctx.mem.get_direct()?), _ => return None, }; // Find the index of the most common byte // Reverse to return the first index of the maximum value and not the last one. - let most_common = dist.iter().enumerate().rev().max_by_key(|(_, n)| *n)?.0; + let most_common = dist + .counters + .iter() + .enumerate() + .rev() + .max_by_key(|(_, n)| *n)? + .0; most_common.try_into().ok().map(Value::Integer) } @@ -369,131 +369,257 @@ impl Math { } } -fn compute_mean(bytes: &[u8]) -> f64 { - let sum: u64 = bytes.iter().map(|v| u64::from(*v)).sum(); +trait MathDigest { + fn update(&mut self, data: &[u8]); + fn finalize(self) -> Option; +} + +struct Mean { + sum: u64, + nb: usize, +} + +impl Mean { + fn new() -> Self { + Self { sum: 0, nb: 0 } + } +} + +impl MathDigest for Mean { + fn update(&mut self, data: &[u8]) { + for b in data { + self.sum = self.sum.saturating_add(u64::from(*b)); + } + self.nb = self.nb.saturating_add(data.len()); + } + + fn finalize(self) -> Option { + Some((self.sum as f64) / (self.nb as f64)) + } +} + +// Algorithm can also be found here: +// https://github.com/Fourmilab/ent_random_sequence_tester/blob/master/src/randtest.c +// +// Basically, for a sequence of bytes [a0, a1, ..., aN]: +// +// scct1 = sum(a0 * a1 + a1 * a2 + ... + a(N-1) * aN + aN * a0) +// scct2 = sum(ax) ** 2 +// scct3 = sum(ax * ax) +// +// scc = (N*scct1 - scct2) / (N*scct3 - scct2) +struct SerialCorrelation { + scct1: f64, + scct2: f64, + scct3: f64, + prev: f64, + first: u8, + first_range: bool, + last: u8, + nb_values: usize, +} + +impl SerialCorrelation { + fn new() -> Self { + Self { + scct1: 0.0, + scct2: 0.0, + scct3: 0.0, + prev: 0.0, + first: 0, + first_range: true, + last: 0, + nb_values: 0, + } + } +} + +impl MathDigest for SerialCorrelation { + fn update(&mut self, data: &[u8]) { + if !data.is_empty() { + if self.first_range { + self.first_range = false; + self.first = data[0]; + } + self.last = data[data.len() - 1]; + } + for c in data { + let c = f64::from(*c); + self.scct1 += self.prev * c; + self.scct2 += c; + self.scct3 += c * c; + self.prev = c; + } + self.nb_values += data.len(); + } + + fn finalize(mut self) -> Option { + // Yes, this breaks the formula for len <= 2. But its how those implementations basically + // handle this... + if self.nb_values > 0 { + self.scct1 += f64::from(u32::from(self.first) * u32::from(self.last)); + } + self.scct2 *= self.scct2; + + let n = self.nb_values as f64; + let scc = n * self.scct3 - self.scct2; + Some(if scc == 0.0 { + -100_000.0 + } else { + (n * self.scct1 - self.scct2) / scc + }) + } +} - (sum as f64) / (bytes.len() as f64) +// Algorithm can also be found here: +// https://github.com/Fourmilab/ent_random_sequence_tester/blob/master/src/randtest.c +// +// As described here: +// +// > Each successive sequence of six bytes is used as 24 bit X and Y co-ordinates within a +// > square. If the distance of the randomly-generated point is less than the radius of a +// > circle inscribed within the square, the six-byte sequence is considered a “hit”. The +// > percentage of hits can be used to calculate the value of Pi. For very large streams +// > (this approximation converges very slowly), the value will approach the correct value of +// > Pi if the sequence is close to random. +struct MonteCarloPi { + inmount: u32, + mcount: u32, + incirc: f64, } -fn compute_deviation(bytes: &[u8], mean: f64) -> f64 { - let dist = distribution(bytes); - let sum: f64 = dist +const MONTEN: usize = 6; +const MONTEN_HALF: i32 = 3; + +impl MonteCarloPi { + fn new() -> Self { + Self { + inmount: 0, + mcount: 0, + incirc: (256.0_f64.powi(MONTEN_HALF) - 1.0).powi(2), + } + } +} + +impl MathDigest for MonteCarloPi { + fn update(&mut self, data: &[u8]) { + for w in data.chunks_exact(MONTEN) { + let mut mx = 0.0_f64; + let mut my = 0.0_f64; + + for j in 0..(MONTEN / 2) { + mx = (mx * 256.0) + f64::from(w[j]); + my = (my * 256.0) + f64::from(w[j + MONTEN / 2]); + } + + self.mcount += 1; + if (mx * mx + my * my) <= self.incirc { + self.inmount += 1; + } + } + } + + fn finalize(self) -> Option { + use std::f64::consts::PI; + + if self.mcount == 0 { + None + } else { + let mpi = 4.0 * f64::from(self.inmount) / f64::from(self.mcount); + Some(((mpi - PI) / PI).abs()) + } + } +} + +fn compute_from_bytes(data: &[u8], mut digest: T) -> Option { + digest.update(data); + digest.finalize() +} + +fn compute_from_mem( + ctx: &mut EvalContext, + offset: i64, + length: i64, + mut digest: T, +) -> Option { + let (start, end) = offset_length_to_start_end(offset, length)?; + ctx.mem.on_range(start, end, |data| digest.update(data))?; + digest.finalize() +} + +fn compute_deviation(distribution: Distribution, mean: f64) -> f64 { + let Distribution { + counters, + nb_values, + } = distribution; + let sum: f64 = counters .into_iter() .enumerate() .filter(|(_, n)| *n != 0) .map(|(c, n)| ((c as f64) - mean).abs() * (n as f64)) .sum(); - sum / (bytes.len() as f64) + sum / (nb_values as f64) } -fn compute_entropy(bytes: &[u8]) -> f64 { - let dist = distribution(bytes); +fn compute_entropy(distribution: Distribution) -> f64 { + let Distribution { + counters, + nb_values, + } = distribution; - let len = bytes.len() as f64; - dist.into_iter() + let nb_values = nb_values as f64; + counters + .into_iter() .filter(|n| *n != 0) .map(|n| { - let x = (n as f64) / len; + let x = (n as f64) / nb_values; -(x * x.log2()) }) .sum() } -fn compute_serial_correlation(bytes: &[u8]) -> f64 { - // Algorithm can also be found here: - // https://github.com/Fourmilab/ent_random_sequence_tester/blob/master/src/randtest.c - // - // Basically, for a sequence of bytes [a0, a1, ..., aN]: - // - // scct1 = sum(a0 * a1 + a1 * a2 + ... + a(N-1) * aN + aN * a0) - // scct2 = sum(ax) ** 2 - // scct3 = sum(ax * ax) - // - // scc = (N*scct1 - scct2) / (N*scct3 - scct2) - let mut scct1 = 0.0_f64; - let mut scct2 = 0.0_f64; - let mut scct3 = 0.0_f64; - let mut prev = 0.0_f64; - - for c in bytes { - let c = f64::from(*c); - scct1 += prev * c; - scct2 += c; - scct3 += c * c; - prev = c; - } - - // Yes, this breaks the formula for len <= 2. But its how those implementations basically - // handle this... - if !bytes.is_empty() { - scct1 += f64::from(u32::from(bytes[0]) * u32::from(bytes[bytes.len() - 1])); - } - scct2 *= scct2; - - let n = bytes.len() as f64; - let scc = n * scct3 - scct2; - if scc == 0.0 { - -100_000.0 - } else { - (n * scct1 - scct2) / scc - } +struct Distribution { + counters: Vec, + nb_values: usize, } -fn compute_monte_carlo_pi(bytes: &[u8]) -> Option { - // Algorithm can also be found here: - // https://github.com/Fourmilab/ent_random_sequence_tester/blob/master/src/randtest.c - // - // As described here: - // - // > Each successive sequence of six bytes is used as 24 bit X and Y co-ordinates within a - // > square. If the distance of the randomly-generated point is less than the radius of a - // > circle inscribed within the square, the six-byte sequence is considered a “hit”. The - // > percentage of hits can be used to calculate the value of Pi. For very large streams - // > (this approximation converges very slowly), the value will approach the correct value of - // > Pi if the sequence is close to random. - use std::f64::consts::PI; - - const MONTEN: usize = 6; - const MONTEN_HALF: i32 = 3; - - let incirc: f64 = (256.0_f64.powi(MONTEN_HALF) - 1.0).powi(2); - - let mut inmount = 0_u32; - let mut mcount = 0_u32; - - for w in bytes.chunks_exact(MONTEN) { - let mut mx = 0.0_f64; - let mut my = 0.0_f64; - - for j in 0..(MONTEN / 2) { - mx = (mx * 256.0) + f64::from(w[j]); - my = (my * 256.0) + f64::from(w[j + MONTEN / 2]); - } +fn distribution(ctx: &mut EvalContext, start: usize, length: usize) -> Option { + let mut distrib = Distribution { + counters: vec![0u64; 256], + nb_values: 0, + }; - mcount += 1; - if (mx * mx + my * my) <= incirc { - inmount += 1; + let end = start.checked_add(length)?; + ctx.mem.on_range(start, end, |bytes| { + for b in bytes { + distrib.counters[*b as usize] += 1; } - } + distrib.nb_values += bytes.len(); + })?; - if mcount == 0 { - None - } else { - let mpi = 4.0 * f64::from(inmount) / f64::from(mcount); - Some(((mpi - PI) / PI).abs()) - } + Some(distrib) } #[inline] -fn distribution(bytes: &[u8]) -> [u64; 256] { - let mut counters = [0u64; 256]; +fn distribution_from_bytes(bytes: &[u8]) -> Distribution { + let mut distrib = Distribution { + counters: vec![0u64; 256], + nb_values: bytes.len(), + }; for b in bytes { - counters[*b as usize] += 1; + distrib.counters[*b as usize] += 1; } - counters + distrib +} + +fn offset_length_to_start_end(offset: i64, length: i64) -> Option<(usize, usize)> { + let start: usize = offset.try_into().ok()?; + let length: usize = length.try_into().ok()?; + let end = start.checked_add(length)?; + Some((start, end)) } #[cfg(test)] diff --git a/boreal/tests/it/fragmented.rs b/boreal/tests/it/fragmented.rs index 75ffeb3d..5bd1f190 100644 --- a/boreal/tests/it/fragmented.rs +++ b/boreal/tests/it/fragmented.rs @@ -544,25 +544,66 @@ rule scanned_pe { #[test] fn test_fragmented_scan_mode_can_refetch_regions() { - let rule = r#" -rule refetched_region { + let mut checker1 = Checker::new_without_yara( + r#" +rule refetched_region1 { condition: uint8(1000) == 0x12 -} -"#; - let mut checker = Checker::new_without_yara(rule); +}"#, + ); + let mut checker2 = Checker::new_without_yara( + r#" +import "math" +rule refetched_region2 { + condition: math.mean(1000, 2) == 25.0 +}"#, + ); // Legacy mode: refetch regions - checker.check_fragmented(&[(1000, Some(b"\x12"))], true); + checker1.check_fragmented(&[(1000, Some(b"\x12\x20"))], true); + checker2.check_fragmented(&[(1000, Some(b"\x12")), (1001, Some(b"\x20"))], true); // Fast mode: do not refetch regions - checker.set_scan_params(ScanParams::default().fragmented_scan_mode(FragmentedScanMode::fast())); - checker.check_fragmented(&[(1000, Some(b"\x12"))], false); + let params = ScanParams::default().fragmented_scan_mode(FragmentedScanMode::fast()); + checker1.set_scan_params(params.clone()); + checker1.check_fragmented(&[(1000, Some(b"\x12"))], false); + checker2.set_scan_params(params); + checker2.check_fragmented(&[(1000, Some(b"\x12")), (1001, Some(b"\x20"))], false); // Single-pass mode: do not refetch regions - checker.set_scan_params( - ScanParams::default().fragmented_scan_mode(FragmentedScanMode::single_pass()), + let params = ScanParams::default().fragmented_scan_mode(FragmentedScanMode::single_pass()); + checker1.set_scan_params(params.clone()); + checker1.check_fragmented(&[(1000, Some(b"\x12"))], false); + checker2.set_scan_params(params); + checker2.check_fragmented(&[(1000, Some(b"\x12")), (1001, Some(b"\x20"))], false); +} + +#[test] +fn test_fragmented_failed_fetch() { + let mut checker = Checker::new( + r#" +rule a { + condition: defined uint8(1000) +}"#, + ); + + checker.check_fragmented(&[(1000, None)], false); + + let mut checker = Checker::new_without_yara( + r#" +import "math" +rule a { + condition: defined math.mean(1000, 2) +}"#, + ); + + checker.check_fragmented(&[(0, None), (1000, Some(b"\x12\x20"))], true); + checker.check_fragmented(&[(1000, None)], false); + checker.check_fragmented(&[(1000, Some(b"\x12")), (1001, None)], false); + // FIXME: yara fails this test, this is a bug. + checker.check_fragmented( + &[(1000, Some(b"\x12")), (1001, Some(b"\x20")), (1002, None)], + true, ); - checker.check_fragmented(&[(1000, Some(b"\x12"))], false); } #[test] From 3206e4c5f21c6b0eab3466f9601288cbe2a0b368 Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Sun, 31 Dec 2023 13:36:56 +0100 Subject: [PATCH 2/3] feat: remove truncating behavior of Memory::get_contiguous --- boreal/src/memory.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/boreal/src/memory.rs b/boreal/src/memory.rs index 2b0ca041..d1cfd9b1 100644 --- a/boreal/src/memory.rs +++ b/boreal/src/memory.rs @@ -95,7 +95,6 @@ impl Memory<'_> { if start >= mem.len() { None } else { - let end = std::cmp::min(mem.len(), end); mem.get(start..end) } } @@ -112,10 +111,9 @@ impl Memory<'_> { if relative_start >= region.length { continue; } - let end = std::cmp::min(region.length, end - region.start); let region = fragmented.obj.fetch(&fragmented.params)?; - return region.mem.get(relative_start..end); + return region.mem.get(relative_start..(end - region.start)); } None From a833bdda02994414ff03c230ecf2727b10897086 Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Sun, 31 Dec 2023 17:02:51 +0100 Subject: [PATCH 3/3] test: add tests on range access to fragmented memory --- boreal/tests/it/hash.rs | 215 +++++++++++++++++++++++--- boreal/tests/it/math.rs | 332 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 523 insertions(+), 24 deletions(-) diff --git a/boreal/tests/it/hash.rs b/boreal/tests/it/hash.rs index f2095f5c..e86cf60c 100644 --- a/boreal/tests/it/hash.rs +++ b/boreal/tests/it/hash.rs @@ -1,5 +1,7 @@ use crate::libyara_compat::util::TEXT_1024_BYTES; -use crate::utils::check; +use crate::utils::{check, Checker}; + +const TEXT: &[u8] = TEXT_1024_BYTES.as_bytes(); fn make_rule(cond: &str) -> String { format!( @@ -25,11 +27,11 @@ fn test_md5() { ); test( r#"hash.md5(0, filesize) == "dcc824971a00e589619ba0c0bba41515""#, - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( r#"hash.md5(50, 100) == "5c026f2a09609f79c46a7dab7398d4ac""#, - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( @@ -54,6 +56,43 @@ fn test_md5() { test(r#"not defined hash.md5(5, filesize)"#, b"a"); test(r#"not defined hash.md5(-1, filesize)"#, b"a"); test(r#"not defined hash.md5(0, -1)"#, b"a"); + + // Test that fragmented memory still works if chunks are contiguous + let mut checker = Checker::new(&make_rule( + r#"hash.md5(50, 100) == "5c026f2a09609f79c46a7dab7398d4ac""#, + )); + checker.check_fragmented(&[(0, Some(TEXT))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..150]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..]))], true); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..130])), + (130, Some(&TEXT[130..150])), + (150, Some(&TEXT[150..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + r#"hash.md5(50, 200) == "5c026f2a09609f79c46a7dab7398d4ac""#, + )); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..150])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined hash.md5(50, 100)")); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (51, Some(&TEXT[51..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (170, Some(&TEXT[170..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..70])), (80, Some(&TEXT[80..]))], true); } #[test] @@ -64,11 +103,11 @@ fn test_sha1() { ); test( r#"hash.sha1(0, filesize) == "ccb665bf4d6e19b56d3f70e9cc2837dfe3f3a745""#, - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( r#"hash.sha1(50, 100) == "1d17cf1bd2c85210e088796fe302d08beb27dd5a""#, - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( @@ -93,6 +132,43 @@ fn test_sha1() { test(r#"not defined hash.sha1(5, filesize)"#, b"a"); test(r#"not defined hash.sha1(-1, filesize)"#, b"a"); test(r#"not defined hash.sha1(0, -1)"#, b"a"); + + // Test that fragmented memory still works if chunks are contiguous + let mut checker = Checker::new(&make_rule( + r#"hash.sha1(50, 100) == "1d17cf1bd2c85210e088796fe302d08beb27dd5a""#, + )); + checker.check_fragmented(&[(0, Some(TEXT))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..150]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..]))], true); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..130])), + (130, Some(&TEXT[130..150])), + (150, Some(&TEXT[150..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + r#"hash.sha1(50, 200) == "1d17cf1bd2c85210e088796fe302d08beb27dd5a""#, + )); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..150])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined hash.sha1(50, 100)")); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (51, Some(&TEXT[51..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (170, Some(&TEXT[170..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..70])), (80, Some(&TEXT[80..]))], true); } #[test] @@ -105,12 +181,12 @@ fn test_sha256() { test( "hash.sha256(0, filesize) == \ \"62b33f9e7880055a0cb2f195e296f5c5f88043e08d5521199d1ae4f16df7b17b\"", - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( "hash.sha256(50, 100) == \ \"a8b65993e5cda9e8c6a93b8913062ae503df81cdebe0af070fd5ec3de4cf7dbf\"", - TEXT_1024_BYTES.as_bytes(), + TEXT, ); test( @@ -137,19 +213,52 @@ fn test_sha256() { test(r#"not defined hash.sha256(5, filesize)"#, b"a"); test(r#"not defined hash.sha256(-1, filesize)"#, b"a"); test(r#"not defined hash.sha256(0, -1)"#, b"a"); + + // Test that fragmented memory still works if chunks are contiguous + let mut checker = Checker::new(&make_rule( + "hash.sha256(50, 100) == \ + \"a8b65993e5cda9e8c6a93b8913062ae503df81cdebe0af070fd5ec3de4cf7dbf\"", + )); + checker.check_fragmented(&[(0, Some(TEXT))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..150]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..]))], true); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..130])), + (130, Some(&TEXT[130..150])), + (150, Some(&TEXT[150..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "hash.sha256(50, 200) == \ + \"a8b65993e5cda9e8c6a93b8913062ae503df81cdebe0af070fd5ec3de4cf7dbf\"", + )); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..150])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined hash.sha256(50, 100)")); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (51, Some(&TEXT[51..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (170, Some(&TEXT[170..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..70])), (80, Some(&TEXT[80..]))], true); } #[test] fn test_checksum32() { test("hash.checksum32(0, filesize) == 97", b"a"); - test( - "hash.checksum32(0, filesize) == 52946", - TEXT_1024_BYTES.as_bytes(), - ); - test( - "hash.checksum32(50, 100) == 5215", - TEXT_1024_BYTES.as_bytes(), - ); + test("hash.checksum32(0, filesize) == 52946", TEXT); + test("hash.checksum32(50, 100) == 5215", TEXT); test( "hash.checksum32(\"abcdefghijklmnopqrstuvwxyz\") == 2847", @@ -168,19 +277,46 @@ fn test_checksum32() { test(r#"not defined hash.checksum32(5, filesize)"#, b"a"); test(r#"not defined hash.checksum32(-1, filesize)"#, b"a"); test(r#"not defined hash.checksum32(0, -1)"#, b"a"); + + // Test that fragmented memory still works if chunks are contiguous + let mut checker = Checker::new(&make_rule("hash.checksum32(50, 100) == 5215")); + checker.check_fragmented(&[(0, Some(TEXT))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..150]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..]))], true); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..130])), + (130, Some(&TEXT[130..150])), + (150, Some(&TEXT[150..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule("hash.checksum32(50, 200) == 5215")); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..150])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined hash.checksum32(50, 100)")); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (51, Some(&TEXT[51..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (170, Some(&TEXT[170..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..70])), (80, Some(&TEXT[80..]))], true); } #[test] fn test_crc32() { test("hash.crc32(0, filesize) == 0xe8b7be43", b"a"); - test( - "hash.crc32(0, filesize) == 0x74cb171", - TEXT_1024_BYTES.as_bytes(), - ); - test( - "hash.crc32(50, 100) == 0x25c34eec", - TEXT_1024_BYTES.as_bytes(), - ); + test("hash.crc32(0, filesize) == 0x74cb171", TEXT); + test("hash.crc32(50, 100) == 0x25c34eec", TEXT); test( "hash.crc32(\"abcdefghijklmnopqrstuvwxyz\") == 0x4c2750bd", @@ -199,4 +335,37 @@ fn test_crc32() { test(r#"not defined hash.crc32(5, filesize)"#, b"a"); test(r#"not defined hash.crc32(-1, filesize)"#, b"a"); test(r#"not defined hash.crc32(0, -1)"#, b"a"); + + // Test that fragmented memory still works if chunks are contiguous + let mut checker = Checker::new(&make_rule("hash.crc32(50, 100) == 0x25c34eec")); + checker.check_fragmented(&[(0, Some(TEXT))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..150]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..75])), (75, Some(&TEXT[75..]))], true); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..130])), + (130, Some(&TEXT[130..150])), + (150, Some(&TEXT[150..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule("hash.crc32(50, 200) == 0x25c34eec")); + checker.check_fragmented( + &[ + (0, Some(&TEXT[0..50])), + (50, Some(&TEXT[50..70])), + (70, Some(&TEXT[70..150])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined hash.crc32(50, 100)")); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (51, Some(&TEXT[51..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..40])), (170, Some(&TEXT[170..]))], true); + checker.check_fragmented(&[(0, Some(&TEXT[0..70])), (80, Some(&TEXT[80..]))], true); } diff --git a/boreal/tests/it/math.rs b/boreal/tests/it/math.rs index 1b2d9216..07f994ca 100644 --- a/boreal/tests/it/math.rs +++ b/boreal/tests/it/math.rs @@ -1,6 +1,6 @@ use crate::{ libyara_compat::util::ELF32_MIPS_FILE, - utils::{check, check_err}, + utils::{check, check_err, Checker}, }; fn make_rule(cond: &str) -> String { @@ -94,6 +94,43 @@ fn test_math_mean() { "math.mean(1) == 1", "mem:5:25: error: invalid arguments types: [integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule("math.mean(150, 250) == 28.864")); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule("math.mean(150, 400) == 28.864")); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.mean(150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -128,6 +165,47 @@ fn test_math_serial_correlation() { "math.serial_correlation(1) == 1", "mem:5:39: error: invalid arguments types: [integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule( + "math.in_range(math.serial_correlation(150, 250), 0.12753, 0.12754)", + )); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "math.in_range(math.serial_correlation(150, 400), 0.12753, 0.12754)", + )); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.serial_correlation(150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -158,6 +236,47 @@ fn test_math_monte_carlo_pi() { "math.monte_carlo_pi(1) == 1", "mem:5:35: error: invalid arguments types: [integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule( + "math.in_range(math.monte_carlo_pi(150, 250), 0.27323, 0.27324)", + )); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "math.in_range(math.monte_carlo_pi(150, 400), 0.27323, 0.27324)", + )); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.monte_carlo_pi(150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -194,6 +313,47 @@ fn test_math_entropy() { "math.entropy(1) == 1", "mem:5:28: error: invalid arguments types: [integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule( + "math.in_range(math.entropy(150, 250), 2.70690, 2.70691)", + )); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "math.in_range(math.entropy(150, 400), 2.70690, 2.70691)", + )); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.entropy(150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -224,6 +384,49 @@ fn test_math_deviation() { "math.deviation(1, 0) == 1", "mem:5:30: error: invalid arguments types: [integer, integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule( + "math.deviation(150, 250, math.MEAN_BYTES) == 109.056", + )); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "math.deviation(150, 400, math.MEAN_BYTES) == 109.056", + )); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule( + "not defined math.deviation(150, 250, math.MEAN_BYTES)", + )); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -251,6 +454,43 @@ fn test_math_count() { "math.count(0.2) == 1", "mem:5:26: error: invalid arguments types: [floating-point number]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule("math.count(0, 150, 250) == 158")); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule("math.count(0, 150, 400) == 158")); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.count(0, 150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -294,6 +534,47 @@ fn test_math_percentage() { "math.percentage(true) == 1", "mem:5:31: error: invalid arguments types: [boolean]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule( + "math.in_range(math.percentage(0, 150, 250), 0.6319, 0.6321)", + )); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..200])), (200, Some(&file[200..400]))], + true, + ); + checker.check_fragmented(&[(0, Some(&file[0..200])), (200, Some(&file[200..]))], true); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..350])), + (350, Some(&file[350..400])), + (400, Some(&file[400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule( + "math.in_range(math.percentage(0, 150, 400), 0.6319, 0.6321)", + )); + checker.check_fragmented( + &[ + (0, Some(&file[0..150])), + (150, Some(&file[150..300])), + (300, Some(&file[300..400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.percentage(0, 150, 250)")); + checker.check_fragmented(&[(0, Some(&file[0..40])), (151, Some(&file[151..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (420, Some(&file[420..]))], true); + checker.check_fragmented(&[(0, Some(&file[0..200])), (220, Some(&file[220..]))], true); } #[test] @@ -318,6 +599,55 @@ fn test_math_mode() { "math.mode(0.2, 1) == 1", "mem:5:25: error: invalid arguments types: [floating-point number, integer]", ); + + // Test that fragmented memory still works if chunks are contiguous + let file = ELF32_MIPS_FILE; + let mut checker = Checker::new(&make_rule("math.mode(5150, 250) == 45")); + checker.check_fragmented(&[(0, Some(file))], true); + checker.check_fragmented( + &[(0, Some(&file[0..5200])), (5200, Some(&file[5200..5400]))], + true, + ); + checker.check_fragmented( + &[(0, Some(&file[0..5200])), (5200, Some(&file[5200..]))], + true, + ); + checker.check_fragmented( + &[ + (0, Some(&file[0..5150])), + (5150, Some(&file[5150..5300])), + (5300, Some(&file[5300..5350])), + (5350, Some(&file[5350..5400])), + (5400, Some(&file[5400..])), + ], + true, + ); + + // Will still return a result if last region truncates the range + let mut checker = Checker::new(&make_rule("math.mode(5150, 400) == 45")); + checker.check_fragmented( + &[ + (0, Some(&file[0..5150])), + (5150, Some(&file[5150..5300])), + (5300, Some(&file[5300..5400])), + ], + true, + ); + + // Missing starting bytes of holes means undefined + let mut checker = Checker::new(&make_rule("not defined math.mode(5150, 250)")); + checker.check_fragmented( + &[(0, Some(&file[0..5040])), (5151, Some(&file[5151..]))], + true, + ); + checker.check_fragmented( + &[(0, Some(&file[0..5200])), (5420, Some(&file[5420..]))], + true, + ); + checker.check_fragmented( + &[(0, Some(&file[0..5200])), (5220, Some(&file[5220..]))], + true, + ); } #[test]