diff --git a/crates/examples/src/bin/dyldcachedump.rs b/crates/examples/src/bin/dyldcachedump.rs index 7d328710..6d7b9424 100644 --- a/crates/examples/src/bin/dyldcachedump.rs +++ b/crates/examples/src/bin/dyldcachedump.rs @@ -1,4 +1,5 @@ -use object::read::macho::DyldCache; +use object::macho::DyldCacheHeader; +use object::read::macho::{DyldCache, DyldSubCacheSlice}; use object::Endianness; use std::{env, fs, process}; @@ -22,7 +23,6 @@ fn main() { continue; } }; - let subcache_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -30,13 +30,27 @@ fn main() { continue; } }; + + let subcaches_info = match get_subcache_info(&file) { + Ok(subcaches_info) => subcaches_info, + Err(err) => { + println!( + "Failed to parse Dyld shared cache file '{}': {}", + file_path, err, + ); + continue; + } + }; + let subcache_files = subcaches_info + .map(|info| open_subcaches(&file_path, info)) + .unwrap_or_default(); let subcache_files: Option> = subcache_files .into_iter() .map( |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { Ok(mmap) => Some(mmap), Err(err) => { - eprintln!("Failed to map file '{}': {}", file_path, err); + println!("Failed to map file '{}': {}", file_path, err); None } }, @@ -69,28 +83,51 @@ fn main() { } } +/// Gets the slice of subcache info structs from the header of the main cache. +fn get_subcache_info( + main_cache_data: &[u8], +) -> object::read::Result>> { + let header = DyldCacheHeader::::parse(main_cache_data)?; + let (_arch, endian) = header.parse_magic()?; + let subcaches_info = header.subcaches(endian, main_cache_data)?; + Ok(subcaches_info) +} + // If the file is a dyld shared cache, and we're on macOS 12 or later, // then there will be one or more "subcache" files next to this file, // with the names filename.1, filename.2, ..., filename.symbols -// or filename.01, filename.02 on macOS 13 -fn open_subcaches_if_exist(path: &str) -> Vec { +// or filename.01, filename.02, ..., filename.symbols on macOS 13 +fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> Vec { + let subcache_suffixes: Vec = match subcaches_info { + DyldSubCacheSlice::V1(subcaches) => { + // macOS 12: Subcaches have the file suffixes .1, .2, .3 etc. + (1..subcaches.len() + 1).map(|i| format!(".{i}")).collect() + } + DyldSubCacheSlice::V2(subcaches) => { + // macOS 13+: The subcache file suffix is written down in the header of the main cache. + subcaches + .iter() + .map(|s| { + // The suffix is a nul-terminated string in a fixed-size byte array. + let suffix = s.file_suffix; + let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len()); + String::from_utf8_lossy(&suffix[..len]).to_string() + }) + .collect() + } + _ => panic!( + "If this case is hit, it means that someone added a variant to the (non-exhaustive) \ + DyldSubCacheSlice enum and forgot to update this example" + ), + }; let mut files = Vec::new(); - for i in 1.. { - let subcache_path = format!("{}.{}", path, i); + for suffix in subcache_suffixes { + let subcache_path = format!("{path}{suffix}"); match fs::File::open(subcache_path) { Ok(subcache_file) => files.push(subcache_file), Err(_) => break, }; } - if files.is_empty() { - for i in 1.. { - let subcache_path = format!("{}.{:02}", path, i); - match fs::File::open(subcache_path) { - Ok(subcache_file) => files.push(subcache_file), - Err(_) => break, - }; - } - } let symbols_subcache_path = format!("{}.symbols", path); if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) { files.push(subcache_file); diff --git a/crates/examples/src/bin/objdump.rs b/crates/examples/src/bin/objdump.rs index 72e00b02..58fb6c46 100644 --- a/crates/examples/src/bin/objdump.rs +++ b/crates/examples/src/bin/objdump.rs @@ -1,3 +1,4 @@ +use object::{macho::DyldCacheHeader, read::macho::DyldSubCacheSlice, Endianness}; use object_examples::objdump; use std::{env, fs, io, process}; @@ -18,7 +19,6 @@ fn main() { process::exit(1); } }; - let extra_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -26,6 +26,10 @@ fn main() { process::exit(1); } }; + let subcaches_info = get_subcache_info_if_dyld_cache(&file).ok().flatten(); + let extra_files = subcaches_info + .map(|info| open_subcaches(&file_path, info)) + .unwrap_or_default(); let extra_files: Vec<_> = extra_files .into_iter() .map( @@ -52,17 +56,48 @@ fn main() { .unwrap(); } +/// Gets the slice of subcache info structs from the header of the main cache, +/// if `main_cache_data` is the data of a Dyld shared cache. +fn get_subcache_info_if_dyld_cache( + main_cache_data: &[u8], +) -> object::read::Result>> { + let header = DyldCacheHeader::::parse(main_cache_data)?; + let (_arch, endian) = header.parse_magic()?; + let subcaches_info = header.subcaches(endian, main_cache_data)?; + Ok(subcaches_info) +} + // If the file is a dyld shared cache, and we're on macOS 12 or later, // then there will be one or more "subcache" files next to this file, -// with the names filename.1, filename.2 etc. -// Read those files now, if they exist, even if we don't know that -// we're dealing with a dyld shared cache. By the time we know what -// we're dealing with, it's too late to read more files. -fn open_subcaches_if_exist(path: &str) -> Vec { +// with the names filename.1, filename.2, ..., filename.symbols +// or filename.01, filename.02, ..., filename.symbols on macOS 13 +fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> Vec { + let subcache_suffixes: Vec = match subcaches_info { + DyldSubCacheSlice::V1(subcaches) => { + // macOS 12: Subcaches have the file suffixes .1, .2, .3 etc. + (1..subcaches.len() + 1).map(|i| format!(".{i}")).collect() + } + DyldSubCacheSlice::V2(subcaches) => { + // macOS 13+: The subcache file suffix is written down in the header of the main cache. + subcaches + .iter() + .map(|s| { + // The suffix is a nul-terminated string in a fixed-size byte array. + let suffix = s.file_suffix; + let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len()); + String::from_utf8_lossy(&suffix[..len]).to_string() + }) + .collect() + } + _ => panic!( + "If this case is hit, it means that someone added a variant to the (non-exhaustive) \ + DyldSubCacheSlice enum and forgot to update this example" + ), + }; let mut files = Vec::new(); - for i in 1.. { - let subcache_path = format!("{}.{}", path, i); - match fs::File::open(&subcache_path) { + for suffix in subcache_suffixes { + let subcache_path = format!("{path}{suffix}"); + match fs::File::open(subcache_path) { Ok(subcache_file) => files.push(subcache_file), Err(_) => break, }; @@ -71,5 +106,6 @@ fn open_subcaches_if_exist(path: &str) -> Vec { if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) { files.push(subcache_file); }; + println!("Found {} subcache files", files.len()); files } diff --git a/crates/examples/src/objdump.rs b/crates/examples/src/objdump.rs index 33a5e084..31c6b16d 100644 --- a/crates/examples/src/objdump.rs +++ b/crates/examples/src/objdump.rs @@ -60,7 +60,7 @@ pub fn print( let path = match image.path() { Ok(path) => path, Err(err) => { - writeln!(e, "Failed to parse dydld image name: {}", err)?; + writeln!(e, "Failed to parse dyld image name: {}", err)?; continue; } }; diff --git a/src/macho.rs b/src/macho.rs index 902b8937..d77f113a 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -379,15 +379,29 @@ pub struct DyldCacheImageInfo { pub pad: U32, } -/// Corresponds to a struct whose source code has not been published as of Nov 2021. -/// Added in the dyld cache version which shipped with macOS 12 / iOS 15. +/// Added in dyld-940, which shipped with macOS 12 / iOS 15. +/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1` +/// in dyld-1042.1. #[derive(Debug, Clone, Copy)] #[repr(C)] -pub struct DyldSubCacheInfo { +pub struct DyldSubCacheEntryV1 { /// The UUID of this subcache. pub uuid: [u8; 16], - /// The size of this subcache plus all previous subcaches. - pub cumulative_size: U64, + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, +} + +/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16. +/// Called `dyld_subcache_entry` as of dyld-1042.1. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldSubCacheEntryV2 { + /// The UUID of this subcache. + pub uuid: [u8; 16], + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, + /// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development". + pub file_suffix: [u8; 32], } // Definitions from "/usr/include/mach-o/loader.h". @@ -3253,7 +3267,8 @@ unsafe_impl_endian_pod!( DyldCacheHeader, DyldCacheMappingInfo, DyldCacheImageInfo, - DyldSubCacheInfo, + DyldSubCacheEntryV1, + DyldSubCacheEntryV2, MachHeader32, MachHeader64, LoadCommand, diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index c8cc0123..6375a369 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -31,8 +31,25 @@ where mappings: &'data [macho::DyldCacheMappingInfo], } -// This is the offset of the images_across_all_subcaches_count field. -const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; +/// A slice of structs describing each subcache. The struct gained +/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16), +/// so this is an enum of the two possible slice types. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum DyldSubCacheSlice<'data, E: Endian> { + /// V1, used between dyld-940 and dyld-1042.1. + V1(&'data [macho::DyldSubCacheEntryV1]), + /// V2, used since dyld-1042.1. + V2(&'data [macho::DyldSubCacheEntryV2]), +} + +// This is the offset of the end of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8; + +// This is the offset of the end of the cacheSubType field. +// This field comes right after the images_across_all_subcaches_count field, +// and we don't currently have it in our definition of the DyldCacheHeader type. +const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0; impl<'data, E, R> DyldCache<'data, E, R> where @@ -51,9 +68,13 @@ where let mappings = header.mappings(endian, data)?; let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); - let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); - - if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + let subcaches_info = header.subcaches(endian, data)?; + let subcaches_count = match subcaches_info { + Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(), + Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(), + None => 0, + }; + if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize { return Err(Error("Incorrect number of SubCaches")); } @@ -66,15 +87,22 @@ where (None, subcache_data) }; - // Read the regular SubCaches (.1, .2, ...), if present. + // Read the regular SubCaches, if present. let mut subcaches = Vec::new(); - for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { - let sc_header = macho::DyldCacheHeader::::parse(data)?; - if sc_header.uuid != info.uuid { - return Err(Error("Unexpected SubCache UUID")); + if let Some(subcaches_info) = subcaches_info { + let (v1, v2) = match subcaches_info { + DyldSubCacheSlice::V1(s) => (s, &[][..]), + DyldSubCacheSlice::V2(s) => (&[][..], s), + }; + let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid)); + for (&data, uuid) in subcache_data.iter().zip(uuids) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if &sc_header.uuid != uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); } - let mappings = sc_header.mappings(endian, data)?; - subcaches.push(DyldSubCache { data, mappings }); } // Read the .symbols SubCache, if present. @@ -255,19 +283,30 @@ impl macho::DyldCacheHeader { } /// Return the information about subcaches, if present. + /// + /// Returns `None` for dyld caches produced before dyld-940 (macOS 12). pub fn subcaches<'data, R: ReadRef<'data>>( &self, endian: E, data: R, - ) -> Result]>> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + ) -> Result>> { + let header_size = self.mapping_offset.get(endian); + if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(DyldSubCacheSlice::V2(subcaches))) + } else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 { let subcaches = data - .read_slice_at::>( + .read_slice_at::>( self.subcaches_offset.get(endian).into(), self.subcaches_count.get(endian) as usize, ) .read_error("Invalid dyld subcaches size or alignment")?; - Ok(Some(subcaches)) + Ok(Some(DyldSubCacheSlice::V1(subcaches))) } else { Ok(None) } @@ -275,7 +314,7 @@ impl macho::DyldCacheHeader { /// Return the UUID for the .symbols subcache, if present. pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { let uuid = self.symbols_subcache_uuid; if uuid != [0; 16] { return Some(uuid); @@ -290,7 +329,7 @@ impl macho::DyldCacheHeader { endian: E, data: R, ) -> Result<&'data [macho::DyldCacheImageInfo]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { data.read_slice_at::>( self.images_across_all_subcaches_offset.get(endian).into(), self.images_across_all_subcaches_count.get(endian) as usize,