From 4f3833415419cf91a94a011fcd7d3a7ac6a07161 Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Wed, 29 Nov 2023 22:12:00 +0100 Subject: [PATCH] test: add test on process scan with param memory_chunk_size --- boreal-test-helpers/src/main.rs | 72 ++++++++++---- boreal/src/scanner/process/sys/linux.rs | 49 +++++++-- boreal/src/scanner/process/sys/windows.rs | 116 ++++++++++++++-------- boreal/tests/it/process.rs | 59 +++++++++++ 4 files changed, 228 insertions(+), 68 deletions(-) diff --git a/boreal-test-helpers/src/main.rs b/boreal-test-helpers/src/main.rs index c3b2df33..c9361dd5 100644 --- a/boreal-test-helpers/src/main.rs +++ b/boreal-test-helpers/src/main.rs @@ -14,13 +14,14 @@ fn main() { match &*arg { "stack" => stack(), "max_fetched_region_size" => max_fetched_region_size(), + "memory_chunk_size" => memory_chunk_size(), _ => panic!("unknown arg {}", arg), } } struct Region { _file: tempfile::NamedTempFile, - map: memmap2::Mmap, + map: memmap2::MmapMut, } impl Region { @@ -47,19 +48,19 @@ fn max_fetched_region_size() { // One page will contain the whole string. // This is "Dwb6r5gd" - let region1 = allocate_region(b"Kxm9}:hk"); + let region1 = Region::new(b"Kxm9}:hk"); // This one will still match, since it is exactly 20 bytes // This is "123456789 Dwb6r5gd" - let region2 = allocate_region(b">=<;:9876/Kxm9}:hk"); + let region2 = Region::new(b">=<;:9876/Kxm9}:hk"); // This one will not match as it gets cut // This is "123456789 12345 Dwb6r5gd" - let region3 = allocate_region(b">=<;:9876/>=<;:/Kxm9}:hk"); + let region3 = Region::new(b">=<;:9876/>=<;:/Kxm9}:hk"); // Past the limit so will not get matched // This is "123456789 123456789 12345 Dwb6r5gd" - let region4 = allocate_region(b">=<;:9876/>=<;:9876/>=<;:/Kxm9}:hk"); + let region4 = Region::new(b">=<;:9876/>=<;:9876/>=<;:/Kxm9}:hk"); // Send the base addresses of the region back to the test println!("{:x}", region1.addr()); @@ -71,24 +72,61 @@ fn max_fetched_region_size() { std::thread::sleep(std::time::Duration::from_secs(500)); } -fn allocate_region(contents: &[u8]) -> Region { - // Create a file, write the xored content into it, and mmap it. - // Why a file instead of an anonymous mapping? It ensures each region is separate - // in the proc maps file, instead of part of the same region. - let mut file = tempfile::NamedTempFile::new().unwrap(); - xor_bytes_into(contents, 15, file.as_file_mut()); - let map = unsafe { memmap2::Mmap::map(file.as_file()).unwrap() }; +fn memory_chunk_size() { + // The searched string is "T5aI0uhg7S", and the chunk size is 10MB + let tenmb = 10 * 1024 * 1024; - Region { _file: file, map } + // One page will contain the string, right at the end. + let mut region1 = Region::zeroed(tenmb); + region1.write_at(tenmb - 10, b"[:nF?zgh8\\"); + + // One page will split the string in two + let mut region2 = Region::zeroed(tenmb + 20); + region2.write_at(tenmb - 5, b"[:nF?zgh8\\"); + + // One page will contain the string, twice, in two separate chunks + let mut region3 = Region::zeroed(tenmb * 3); + // First one is right at the 15MB limit + region3.write_at(tenmb + 5 * 1024 * 1024 - 5, b"[:nF?zgh8\\"); + // Second one is after 20MB + region3.write_at(2 * tenmb + 4096, b"[:nF?zgh8\\"); + + // Send the base addresses of the region back to the test + println!("{:x}", region1.addr()); + println!("{:x}", region2.addr()); + println!("{:x}", region3.addr()); + + println!("ready"); + std::thread::sleep(std::time::Duration::from_secs(500)); +} + +impl Region { + fn new(contents: &[u8]) -> Self { + let mut this = Self::zeroed(contents.len()); + this.write_at(0, contents); + this + } + + fn zeroed(size: usize) -> Self { + let mut file = tempfile::NamedTempFile::new().unwrap(); + let contents = vec![0; size]; + file.write_all(&contents).unwrap(); + let map = unsafe { memmap2::MmapMut::map_mut(file.as_file()).unwrap() }; + + Self { _file: file, map } + } + + fn write_at(&mut self, offset: usize, payload: &[u8]) { + xor_bytes_into(payload, 15, &mut self.map[offset..(offset + payload.len())]); + } } fn xor_bytes(v: &[u8], xor_byte: u8) -> Vec { v.iter().map(|b| *b ^ xor_byte).collect() } -fn xor_bytes_into(v: &[u8], xor_byte: u8, f: &mut std::fs::File) { - for b in v { - f.write_all(&[*b ^ xor_byte]).unwrap(); +fn xor_bytes_into(v: &[u8], xor_byte: u8, dest: &mut [u8]) { + for (v, d) in v.iter().zip(dest.iter_mut()) { + *d = *v ^ xor_byte; } - f.flush().unwrap(); } diff --git a/boreal/src/scanner/process/sys/linux.rs b/boreal/src/scanner/process/sys/linux.rs index 89b78876..1b89d228 100644 --- a/boreal/src/scanner/process/sys/linux.rs +++ b/boreal/src/scanner/process/sys/linux.rs @@ -18,6 +18,7 @@ pub fn process_memory(pid: u32) -> Result, ScanError> maps_file: BufReader::new(maps_file), mem_file, buffer: Vec::new(), + current_position: None, region: None, })) } @@ -65,18 +66,29 @@ struct LinuxProcessMemory { // Buffer used to hold the duplicated process memory when fetched. buffer: Vec, - // Current region. + // Current position: current region and offset in the region of the current chunk. + current_position: Option<(RegionDescription, usize)>, + + // Current region returned by the next call, which needs to be fetched. region: Option, } -impl FragmentedMemory for LinuxProcessMemory { - fn reset(&mut self) { - let _ = self.maps_file.rewind(); - } +impl LinuxProcessMemory { + fn next_position(&mut self, params: &MemoryParams) { + if let Some(chunk_size) = params.memory_chunk_size { + if let Some((desc, mut offset)) = self.current_position { + offset = offset.saturating_add(chunk_size); + if offset < desc.length { + // Region has a next chunk, so simply select it. + self.current_position = Some((desc, offset)); + return; + } + } + } - fn next(&mut self, _params: &MemoryParams) -> Option { + // Otherwise, read the next line from the maps file let mut line = String::new(); - self.region = loop { + self.current_position = loop { line.clear(); if self.maps_file.read_line(&mut line).is_err() { break None; @@ -85,9 +97,29 @@ impl FragmentedMemory for LinuxProcessMemory { break None; } if let Some(desc) = parse_map_line(&line) { - break Some(desc); + break Some((desc, 0)); } }; + } +} + +impl FragmentedMemory for LinuxProcessMemory { + fn reset(&mut self) { + let _ = self.maps_file.rewind(); + } + + fn next(&mut self, params: &MemoryParams) -> Option { + self.next_position(params); + + self.region = self + .current_position + .map(|(desc, offset)| match params.memory_chunk_size { + Some(chunk_size) => RegionDescription { + start: desc.start.saturating_add(offset), + length: std::cmp::min(chunk_size, desc.length), + }, + None => desc, + }); self.region } @@ -99,6 +131,7 @@ impl FragmentedMemory for LinuxProcessMemory { .ok()?; let length = std::cmp::min(desc.length, params.max_fetched_region_size); + self.buffer.resize(length, 0); self.mem_file.read_exact(&mut self.buffer).ok()?; diff --git a/boreal/src/scanner/process/sys/windows.rs b/boreal/src/scanner/process/sys/windows.rs index 543e6861..21de31f2 100644 --- a/boreal/src/scanner/process/sys/windows.rs +++ b/boreal/src/scanner/process/sys/windows.rs @@ -41,6 +41,7 @@ pub fn process_memory(pid: u32) -> Result, ScanError> Ok(Box::new(WindowsProcessMemory { handle, buffer: Vec::new(), + current_position: None, region: None, })) } @@ -53,57 +54,86 @@ struct WindowsProcessMemory { // Buffer used to hold the duplicated process memory when fetched. buffer: Vec, - // Description of the current region. + // Current position: current region and offset in the region of the current chunk. + current_position: Option<(RegionDescription, usize)>, + + // Current region returned by the next call, which needs to be fetched. region: Option, } -impl FragmentedMemory for WindowsProcessMemory { - fn reset(&mut self) { - self.region = None; - } +impl WindowsProcessMemory { + fn next_position(&self, params: &MemoryParams) -> Option<(RegionDescription, usize)> { + let next_addr = match self.current_position { + Some((desc, mut offset)) => { + if let Some(chunk_size) = params.memory_chunk_size { + offset = offset.saturating_add(chunk_size); + if offset < desc.length { + // Region has a next chunk, so simply select it. + return Some((desc, offset)); + } + } - fn next(&mut self, _params: &MemoryParams) -> Option { - let mut next_addr = match self.region { - Some(region) => Some(region.start.checked_add(region.length)?), - None => None, - }; - self.region = loop { - let mut info = MaybeUninit::uninit(); - // Safety: - // - the handle is a valid process handle and has the PROCESS_QUERY_INFORMATION - // permission. - let res = unsafe { - VirtualQueryEx( - handle_to_windows_handle(self.handle.as_handle()), - next_addr.map(|v| v as *const c_void), - info.as_mut_ptr(), - std::mem::size_of::(), - ) - }; - - if res == 0 { - break None; + desc.start.checked_add(desc.length)? } + None => 0, + }; - // Safety: returned value is not zero, so the function succeeded, and has filled - // the info object. - let info = unsafe { info.assume_init() }; + query_next_region(self.handle.as_handle(), next_addr).map(|desc| (desc, 0)) + } +} - next_addr = match (info.BaseAddress as usize).checked_add(info.RegionSize) { - Some(v) => Some(v), - None => { - // If this happens, a region actually covers up to u64::MAX, so there cannot - // be any region past it. That's unlikely, but lets just be safe about it. - break None; - } - }; - if info.State == MEM_COMMIT && info.Protect != PAGE_NOACCESS { - break Some(RegionDescription { - start: info.BaseAddress as usize, - length: info.RegionSize, - }); - } +fn query_next_region(handle: BorrowedHandle, mut next_addr: usize) -> Option { + loop { + let mut info = MaybeUninit::uninit(); + // Safety: + // - the handle is a valid process handle and has the PROCESS_QUERY_INFORMATION + // permission. + let res = unsafe { + VirtualQueryEx( + handle_to_windows_handle(handle.as_handle()), + Some(next_addr as *const c_void), + info.as_mut_ptr(), + std::mem::size_of::(), + ) }; + + if res == 0 { + return None; + } + + // Safety: returned value is not zero, so the function succeeded, and has filled + // the info object. + let info = unsafe { info.assume_init() }; + + // If this checked_add fails, a region actually covers up to u64::MAX, so there cannot + // be any region past it. That's unlikely, but lets just be safe about it. + next_addr = (info.BaseAddress as usize).checked_add(info.RegionSize)?; + if info.State == MEM_COMMIT && info.Protect != PAGE_NOACCESS { + return Some(RegionDescription { + start: info.BaseAddress as usize, + length: info.RegionSize, + }); + } + } +} + +impl FragmentedMemory for WindowsProcessMemory { + fn reset(&mut self) { + self.region = None; + } + + fn next(&mut self, params: &MemoryParams) -> Option { + self.current_position = self.next_position(params); + + self.region = self + .current_position + .map(|(desc, offset)| match params.memory_chunk_size { + Some(chunk_size) => RegionDescription { + start: desc.start.saturating_add(offset), + length: std::cmp::min(chunk_size, desc.length), + }, + None => desc, + }); self.region } diff --git a/boreal/tests/it/process.rs b/boreal/tests/it/process.rs index 21870621..ed86f9bb 100644 --- a/boreal/tests/it/process.rs +++ b/boreal/tests/it/process.rs @@ -177,6 +177,65 @@ rule a { assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]); } +#[test] +#[cfg(any(target_os = "linux", windows))] +fn test_process_memory_chunk_size() { + use boreal::scanner::ScanParams; + + use crate::utils::get_boreal_full_matches; + + let checker = Checker::new_without_yara( + r#" +rule a { + strings: + $a = "T5aI0uhg7S" + condition: + $a +}"#, + ); + let mut scanner = checker.scanner().scanner; + let tenmb = 10 * 1024 * 1024; + scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(tenmb))); + + let helper = BinHelper::run("memory_chunk_size"); + assert_eq!(helper.output.len(), 3); + dbg!(&helper.output); + let region1 = usize::from_str_radix(&helper.output[0], 16).unwrap(); + let region2 = usize::from_str_radix(&helper.output[1], 16).unwrap(); + let region3 = usize::from_str_radix(&helper.output[2], 16).unwrap(); + + let res = scanner.scan_process(helper.pid()).unwrap(); + let res = get_boreal_full_matches(&res); + let tenmb = 10 * 1024 * 1024; + let mut expected = vec![ + (b"T5aI0uhg7S".as_slice(), region1 + (tenmb - 10), 10), + ( + b"T5aI0uhg7S".as_slice(), + region3 + tenmb + 5 * 1024 * 1024 - 5, + 10, + ), + (b"T5aI0uhg7S".as_slice(), region3 + 2 * tenmb + 4096, 10), + ]; + // Sort by address, since the provided regions might not be in the same order as creation. + expected.sort_by_key(|v| v.1); + assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]); + + scanner.set_scan_params(ScanParams::default().memory_chunk_size(Some(15 * 1024 * 1024))); + let res = scanner.scan_process(helper.pid()).unwrap(); + let res = get_boreal_full_matches(&res); + let mut expected = vec![ + (b"T5aI0uhg7S".as_slice(), region1 + (tenmb - 10), 10), + // We now see the one in region2 + (b"T5aI0uhg7S".as_slice(), region2 + tenmb - 5, 10), + // But no longer see the first one in region3 + (b"T5aI0uhg7S".as_slice(), region3 + 2 * tenmb + 4096, 10), + ]; + // Sort by address, since the provided regions might not be in the same order as creation. + expected.sort_by_key(|v| v.1); + + assert_eq!(res, vec![("default:a".to_owned(), vec![("a", expected)])]); +} + struct BinHelper { proc: std::process::Child, output: Vec,