From 1a1fab7a62b952f092e1b694fa6faea92eb5ba2a Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Fri, 8 Mar 2024 19:09:59 -0500 Subject: [PATCH 1/4] Correctly handle dyld caches on macOS 13 and above This allows successful parsing of dyld caches on macOS 13 and above on Intel Macs. The main dyld cache file on macOS contains an array of subcache info structs, each of which specifies the UUID (and some other information) of each subcache. `DyldCache::parse` checks that the subcache UUIDs match these expected UUIDs. In macOS 13, the format of the subcache info struct changed: it gained an additional field after the UUID field. This means that as soon as you had more than one subcache, our UUID check would fail, because the second subcache UUID would be read from the wrong offset. I didn't notice this on my Apple Silicon Mac, because the arm64e dyld cache only has one subcache: `dyld_shared_cache_arm64e.01`. But on Intel Macs, there are currently four subcaches: `dyld_shared_cache_x86_64.01`, `.02`, `.03`, and `.04`. In practice this means that my software hasn't been able to symbolicate macOS system libraries on Intel Macs since the release of macOS 13. This commit adds the new struct definition and makes the UUID check work correctly. This is a breaking change to the public API. I added a `DyldSubCacheSlice` enum, but I'm not particularly fond of it. I'm also not a big fan of the new allocation for the Vec of UUIDs, but it seemed better than the alternatives I tried, which all had a bunch of code duplication. --- src/macho.rs | 27 ++++++++++--- src/read/macho/dyld_cache.rs | 73 +++++++++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/src/macho.rs b/src/macho.rs index 902b8937..d77f113a 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -379,15 +379,29 @@ pub struct DyldCacheImageInfo { pub pad: U32, } -/// Corresponds to a struct whose source code has not been published as of Nov 2021. -/// Added in the dyld cache version which shipped with macOS 12 / iOS 15. +/// Added in dyld-940, which shipped with macOS 12 / iOS 15. +/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1` +/// in dyld-1042.1. #[derive(Debug, Clone, Copy)] #[repr(C)] -pub struct DyldSubCacheInfo { +pub struct DyldSubCacheEntryV1 { /// The UUID of this subcache. pub uuid: [u8; 16], - /// The size of this subcache plus all previous subcaches. - pub cumulative_size: U64, + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, +} + +/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16. +/// Called `dyld_subcache_entry` as of dyld-1042.1. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldSubCacheEntryV2 { + /// The UUID of this subcache. + pub uuid: [u8; 16], + /// The offset of this subcache from the main cache base address. + pub cache_vm_offset: U64, + /// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development". + pub file_suffix: [u8; 32], } // Definitions from "/usr/include/mach-o/loader.h". @@ -3253,7 +3267,8 @@ unsafe_impl_endian_pod!( DyldCacheHeader, DyldCacheMappingInfo, DyldCacheImageInfo, - DyldSubCacheInfo, + DyldSubCacheEntryV1, + DyldSubCacheEntryV2, MachHeader32, MachHeader64, LoadCommand, diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index c8cc0123..00f9f940 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -31,8 +31,24 @@ where mappings: &'data [macho::DyldCacheMappingInfo], } -// This is the offset of the images_across_all_subcaches_count field. -const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; +/// A slice of structs describing each subcache. The struct gained +/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16), +/// so this is an enum of the two possible slice types. +#[derive(Debug, Clone, Copy)] +pub enum DyldSubCacheSlice<'data, E: Endian> { + /// V1, used between dyld-940 and dyld-1042.1. + V1(&'data [macho::DyldSubCacheEntryV1]), + /// V2, used since dyld-1042.1. + V2(&'data [macho::DyldSubCacheEntryV2]), +} + +// This is the offset of the end of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8; + +// This is the offset of the end of the cacheSubType field. +// This field comes right after the images_across_all_subcaches_count field, +// and we don't currently have it in our definition of the DyldCacheHeader type. +const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0; impl<'data, E, R> DyldCache<'data, E, R> where @@ -51,9 +67,13 @@ where let mappings = header.mappings(endian, data)?; let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); - let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); - - if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + let subcaches_info = header.subcaches(endian, data)?; + let subcaches_count = match subcaches_info { + Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(), + Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(), + None => 0, + }; + if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize { return Err(Error("Incorrect number of SubCaches")); } @@ -66,15 +86,21 @@ where (None, subcache_data) }; - // Read the regular SubCaches (.1, .2, ...), if present. + // Read the regular SubCaches, if present. let mut subcaches = Vec::new(); - for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { - let sc_header = macho::DyldCacheHeader::::parse(data)?; - if sc_header.uuid != info.uuid { - return Err(Error("Unexpected SubCache UUID")); + if let Some(subcaches_info) = subcaches_info { + let uuids: Vec<&[u8; 16]> = match subcaches_info { + DyldSubCacheSlice::V1(s) => s.iter().map(|e| &e.uuid).collect(), + DyldSubCacheSlice::V2(s) => s.iter().map(|e| &e.uuid).collect(), + }; + for (&data, uuid) in subcache_data.iter().zip(uuids) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if &sc_header.uuid != uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); } - let mappings = sc_header.mappings(endian, data)?; - subcaches.push(DyldSubCache { data, mappings }); } // Read the .symbols SubCache, if present. @@ -255,19 +281,30 @@ impl macho::DyldCacheHeader { } /// Return the information about subcaches, if present. + /// + /// Returns `None` for dyld caches produced before dyld-940 (macOS 12). pub fn subcaches<'data, R: ReadRef<'data>>( &self, endian: E, data: R, - ) -> Result]>> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + ) -> Result>> { + let header_size = self.mapping_offset.get(endian); + if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(DyldSubCacheSlice::V2(subcaches))) + } else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 { let subcaches = data - .read_slice_at::>( + .read_slice_at::>( self.subcaches_offset.get(endian).into(), self.subcaches_count.get(endian) as usize, ) .read_error("Invalid dyld subcaches size or alignment")?; - Ok(Some(subcaches)) + Ok(Some(DyldSubCacheSlice::V1(subcaches))) } else { Ok(None) } @@ -275,7 +312,7 @@ impl macho::DyldCacheHeader { /// Return the UUID for the .symbols subcache, if present. pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { let uuid = self.symbols_subcache_uuid; if uuid != [0; 16] { return Some(uuid); @@ -290,7 +327,7 @@ impl macho::DyldCacheHeader { endian: E, data: R, ) -> Result<&'data [macho::DyldCacheImageInfo]> { - if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 { data.read_slice_at::>( self.images_across_all_subcaches_offset.get(endian).into(), self.images_across_all_subcaches_count.get(endian) as usize, From a24f21cc9cc3964a45175b6271887ec12e134ff2 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Sat, 9 Mar 2024 20:40:12 -0500 Subject: [PATCH 2/4] Make dyldcachedump and objdump load the correct dyld subcaches. dyldcachedump was working correctly on macOS 13+ because it was trying the "leading zero" suffix format as well as the "no leading zero" suffix format. This commit changes it to read the suffix from the main cache header. objdump was not able to parse dyld shared cache files on macOS 13+ because it was only using the "no leading zero" suffix format, and thus not finding the subcaches. --- crates/examples/src/bin/dyldcachedump.rs | 65 ++++++++++++++++++------ crates/examples/src/bin/objdump.rs | 50 ++++++++++++++---- crates/examples/src/objdump.rs | 2 +- 3 files changed, 91 insertions(+), 26 deletions(-) diff --git a/crates/examples/src/bin/dyldcachedump.rs b/crates/examples/src/bin/dyldcachedump.rs index 7d328710..36745048 100644 --- a/crates/examples/src/bin/dyldcachedump.rs +++ b/crates/examples/src/bin/dyldcachedump.rs @@ -1,4 +1,5 @@ -use object::read::macho::DyldCache; +use object::macho::DyldCacheHeader; +use object::read::macho::{DyldCache, DyldSubCacheSlice}; use object::Endianness; use std::{env, fs, process}; @@ -22,7 +23,6 @@ fn main() { continue; } }; - let subcache_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -30,13 +30,27 @@ fn main() { continue; } }; + + let subcaches_info = match get_subcache_info(&file) { + Ok(subcaches_info) => subcaches_info, + Err(err) => { + println!( + "Failed to parse Dyld shared cache file '{}': {}", + file_path, err, + ); + continue; + } + }; + let subcache_files = subcaches_info + .map(|info| open_subcaches(&file_path, info)) + .unwrap_or_default(); let subcache_files: Option> = subcache_files .into_iter() .map( |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { Ok(mmap) => Some(mmap), Err(err) => { - eprintln!("Failed to map file '{}': {}", file_path, err); + println!("Failed to map file '{}': {}", file_path, err); None } }, @@ -69,28 +83,47 @@ fn main() { } } +/// Gets the slice of subcache info structs from the header of the main cache. +fn get_subcache_info( + main_cache_data: &[u8], +) -> object::read::Result>> { + let header = DyldCacheHeader::::parse(main_cache_data)?; + let (_arch, endian) = header.parse_magic()?; + let subcaches_info = header.subcaches(endian, main_cache_data)?; + Ok(subcaches_info) +} + // If the file is a dyld shared cache, and we're on macOS 12 or later, // then there will be one or more "subcache" files next to this file, // with the names filename.1, filename.2, ..., filename.symbols -// or filename.01, filename.02 on macOS 13 -fn open_subcaches_if_exist(path: &str) -> Vec { +// or filename.01, filename.02, ..., filename.symbols on macOS 13 +fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> Vec { + let subcache_suffixes: Vec = match subcaches_info { + DyldSubCacheSlice::V1(subcaches) => { + // macOS 12: Subcaches have the file suffixes .1, .2, .3 etc. + (1..subcaches.len() + 1).map(|i| format!(".{i}")).collect() + } + DyldSubCacheSlice::V2(subcaches) => { + // macOS 13+: The subcache file suffix is written down in the header of the main cache. + subcaches + .iter() + .map(|s| { + // The suffix is a nul-terminated string in a fixed-size byte array. + let suffix = s.file_suffix; + let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len()); + String::from_utf8_lossy(&suffix[..len]).to_string() + }) + .collect() + } + }; let mut files = Vec::new(); - for i in 1.. { - let subcache_path = format!("{}.{}", path, i); + for suffix in subcache_suffixes { + let subcache_path = format!("{path}{suffix}"); match fs::File::open(subcache_path) { Ok(subcache_file) => files.push(subcache_file), Err(_) => break, }; } - if files.is_empty() { - for i in 1.. { - let subcache_path = format!("{}.{:02}", path, i); - match fs::File::open(subcache_path) { - Ok(subcache_file) => files.push(subcache_file), - Err(_) => break, - }; - } - } let symbols_subcache_path = format!("{}.symbols", path); if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) { files.push(subcache_file); diff --git a/crates/examples/src/bin/objdump.rs b/crates/examples/src/bin/objdump.rs index 72e00b02..71da21f2 100644 --- a/crates/examples/src/bin/objdump.rs +++ b/crates/examples/src/bin/objdump.rs @@ -1,3 +1,4 @@ +use object::{macho::DyldCacheHeader, read::macho::DyldSubCacheSlice, Endianness}; use object_examples::objdump; use std::{env, fs, io, process}; @@ -18,7 +19,6 @@ fn main() { process::exit(1); } }; - let extra_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -26,6 +26,10 @@ fn main() { process::exit(1); } }; + let subcaches_info = get_subcache_info_if_dyld_cache(&file).ok().flatten(); + let extra_files = subcaches_info + .map(|info| open_subcaches(&file_path, info)) + .unwrap_or_default(); let extra_files: Vec<_> = extra_files .into_iter() .map( @@ -52,17 +56,44 @@ fn main() { .unwrap(); } +/// Gets the slice of subcache info structs from the header of the main cache, +/// if `main_cache_data` is the data of a Dyld shared cache. +fn get_subcache_info_if_dyld_cache( + main_cache_data: &[u8], +) -> object::read::Result>> { + let header = DyldCacheHeader::::parse(main_cache_data)?; + let (_arch, endian) = header.parse_magic()?; + let subcaches_info = header.subcaches(endian, main_cache_data)?; + Ok(subcaches_info) +} + // If the file is a dyld shared cache, and we're on macOS 12 or later, // then there will be one or more "subcache" files next to this file, -// with the names filename.1, filename.2 etc. -// Read those files now, if they exist, even if we don't know that -// we're dealing with a dyld shared cache. By the time we know what -// we're dealing with, it's too late to read more files. -fn open_subcaches_if_exist(path: &str) -> Vec { +// with the names filename.1, filename.2, ..., filename.symbols +// or filename.01, filename.02, ..., filename.symbols on macOS 13 +fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> Vec { + let subcache_suffixes: Vec = match subcaches_info { + DyldSubCacheSlice::V1(subcaches) => { + // macOS 12: Subcaches have the file suffixes .1, .2, .3 etc. + (1..subcaches.len() + 1).map(|i| format!(".{i}")).collect() + } + DyldSubCacheSlice::V2(subcaches) => { + // macOS 13+: The subcache file suffix is written down in the header of the main cache. + subcaches + .iter() + .map(|s| { + // The suffix is a nul-terminated string in a fixed-size byte array. + let suffix = s.file_suffix; + let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len()); + String::from_utf8_lossy(&suffix[..len]).to_string() + }) + .collect() + } + }; let mut files = Vec::new(); - for i in 1.. { - let subcache_path = format!("{}.{}", path, i); - match fs::File::open(&subcache_path) { + for suffix in subcache_suffixes { + let subcache_path = format!("{path}{suffix}"); + match fs::File::open(subcache_path) { Ok(subcache_file) => files.push(subcache_file), Err(_) => break, }; @@ -71,5 +102,6 @@ fn open_subcaches_if_exist(path: &str) -> Vec { if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) { files.push(subcache_file); }; + println!("Found {} subcache files", files.len()); files } diff --git a/crates/examples/src/objdump.rs b/crates/examples/src/objdump.rs index 33a5e084..31c6b16d 100644 --- a/crates/examples/src/objdump.rs +++ b/crates/examples/src/objdump.rs @@ -60,7 +60,7 @@ pub fn print( let path = match image.path() { Ok(path) => path, Err(err) => { - writeln!(e, "Failed to parse dydld image name: {}", err)?; + writeln!(e, "Failed to parse dyld image name: {}", err)?; continue; } }; From 6c15656784414b9f50a4de41761bec2ec6ebe5cf Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Sun, 10 Mar 2024 11:11:50 -0400 Subject: [PATCH 3/4] Make DyldSubCacheSlice non-exhaustive. --- crates/examples/src/bin/dyldcachedump.rs | 4 ++++ crates/examples/src/bin/objdump.rs | 4 ++++ src/read/macho/dyld_cache.rs | 1 + 3 files changed, 9 insertions(+) diff --git a/crates/examples/src/bin/dyldcachedump.rs b/crates/examples/src/bin/dyldcachedump.rs index 36745048..6d7b9424 100644 --- a/crates/examples/src/bin/dyldcachedump.rs +++ b/crates/examples/src/bin/dyldcachedump.rs @@ -115,6 +115,10 @@ fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> }) .collect() } + _ => panic!( + "If this case is hit, it means that someone added a variant to the (non-exhaustive) \ + DyldSubCacheSlice enum and forgot to update this example" + ), }; let mut files = Vec::new(); for suffix in subcache_suffixes { diff --git a/crates/examples/src/bin/objdump.rs b/crates/examples/src/bin/objdump.rs index 71da21f2..58fb6c46 100644 --- a/crates/examples/src/bin/objdump.rs +++ b/crates/examples/src/bin/objdump.rs @@ -89,6 +89,10 @@ fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice) -> }) .collect() } + _ => panic!( + "If this case is hit, it means that someone added a variant to the (non-exhaustive) \ + DyldSubCacheSlice enum and forgot to update this example" + ), }; let mut files = Vec::new(); for suffix in subcache_suffixes { diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index 00f9f940..af46cbd7 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -35,6 +35,7 @@ where /// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16), /// so this is an enum of the two possible slice types. #[derive(Debug, Clone, Copy)] +#[non_exhaustive] pub enum DyldSubCacheSlice<'data, E: Endian> { /// V1, used between dyld-940 and dyld-1042.1. V1(&'data [macho::DyldSubCacheEntryV1]), From 40ba2259bbc591d534a05fd9a34a315607ced1f7 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Sun, 10 Mar 2024 11:18:59 -0400 Subject: [PATCH 4/4] Avoid Vec allocation for subcache UUIDs. --- src/read/macho/dyld_cache.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index af46cbd7..6375a369 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -90,10 +90,11 @@ where // Read the regular SubCaches, if present. let mut subcaches = Vec::new(); if let Some(subcaches_info) = subcaches_info { - let uuids: Vec<&[u8; 16]> = match subcaches_info { - DyldSubCacheSlice::V1(s) => s.iter().map(|e| &e.uuid).collect(), - DyldSubCacheSlice::V2(s) => s.iter().map(|e| &e.uuid).collect(), + let (v1, v2) = match subcaches_info { + DyldSubCacheSlice::V1(s) => (s, &[][..]), + DyldSubCacheSlice::V2(s) => (&[][..], s), }; + let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid)); for (&data, uuid) in subcache_data.iter().zip(uuids) { let sc_header = macho::DyldCacheHeader::::parse(data)?; if &sc_header.uuid != uuid {