Skip to content

Commit

Permalink
Correctly handle dyld caches on macOS 13 and above
Browse files Browse the repository at this point in the history
This allows successful parsing of dyld caches on
macOS 13 and above on Intel Macs.

The main dyld cache file on macOS contains an array of
subcache info structs, each of which specifies the UUID
(and some other information) of each subcache.
`DyldCache::parse` checks that the subcache UUIDs match
these expected UUIDs.

In macOS 13, the format of the subcache info struct
changed: it gained an additional field after the UUID
field. This means that as soon as you had more than
one subcache, our UUID check would fail, because the
second subcache UUID would be read from the wrong offset.

I didn't notice this on my Apple Silicon Mac, because
the arm64e dyld cache only has one subcache:
`dyld_shared_cache_arm64e.01`.
But on Intel Macs, there are currently four subcaches:
`dyld_shared_cache_x86_64.01`, `.02`, `.03`, and `.04`.

In practice this means that my software hasn't been able to
symbolicate macOS system libraries on Intel Macs since
the release of macOS 13.

This commit adds the new struct definition and makes
the UUID check work correctly.

This is a breaking change to the public API. I added
a `DyldSubCacheSlice` enum, but I'm not particularly
fond of it.
I'm also not a big fan of the new allocation for the
Vec of UUIDs, but it seemed better than the alternatives
I tried, which all had a bunch of code duplication.
  • Loading branch information
mstange committed Mar 9, 2024
1 parent 6677d67 commit 1a1fab7
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 24 deletions.
27 changes: 21 additions & 6 deletions src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,15 +379,29 @@ pub struct DyldCacheImageInfo<E: Endian> {
pub pad: U32<E>,
}

/// Corresponds to a struct whose source code has not been published as of Nov 2021.
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
/// Added in dyld-940, which shipped with macOS 12 / iOS 15.
/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1`
/// in dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheInfo<E: Endian> {
pub struct DyldSubCacheEntryV1<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The size of this subcache plus all previous subcaches.
pub cumulative_size: U64<E>,
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
}

/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16.
/// Called `dyld_subcache_entry` as of dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheEntryV2<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
/// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development".
pub file_suffix: [u8; 32],
}

// Definitions from "/usr/include/mach-o/loader.h".
Expand Down Expand Up @@ -3253,7 +3267,8 @@ unsafe_impl_endian_pod!(
DyldCacheHeader,
DyldCacheMappingInfo,
DyldCacheImageInfo,
DyldSubCacheInfo,
DyldSubCacheEntryV1,
DyldSubCacheEntryV2,
MachHeader32,
MachHeader64,
LoadCommand,
Expand Down
73 changes: 55 additions & 18 deletions src/read/macho/dyld_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,24 @@ where
mappings: &'data [macho::DyldCacheMappingInfo<E>],
}

// This is the offset of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4;
/// A slice of structs describing each subcache. The struct gained
/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16),
/// so this is an enum of the two possible slice types.
#[derive(Debug, Clone, Copy)]
pub enum DyldSubCacheSlice<'data, E: Endian> {
/// V1, used between dyld-940 and dyld-1042.1.
V1(&'data [macho::DyldSubCacheEntryV1<E>]),
/// V2, used since dyld-1042.1.
V2(&'data [macho::DyldSubCacheEntryV2<E>]),
}

// This is the offset of the end of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8;

// This is the offset of the end of the cacheSubType field.
// This field comes right after the images_across_all_subcaches_count field,
// and we don't currently have it in our definition of the DyldCacheHeader type.
const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0;

impl<'data, E, R> DyldCache<'data, E, R>
where
Expand All @@ -51,9 +67,13 @@ where
let mappings = header.mappings(endian, data)?;

let symbols_subcache_uuid = header.symbols_subcache_uuid(endian);
let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]);

if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize {
let subcaches_info = header.subcaches(endian, data)?;
let subcaches_count = match subcaches_info {
Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(),
Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(),
None => 0,
};
if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize {
return Err(Error("Incorrect number of SubCaches"));
}

Expand All @@ -66,15 +86,21 @@ where
(None, subcache_data)
};

// Read the regular SubCaches (.1, .2, ...), if present.
// Read the regular SubCaches, if present.
let mut subcaches = Vec::new();
for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if sc_header.uuid != info.uuid {
return Err(Error("Unexpected SubCache UUID"));
if let Some(subcaches_info) = subcaches_info {
let uuids: Vec<&[u8; 16]> = match subcaches_info {
DyldSubCacheSlice::V1(s) => s.iter().map(|e| &e.uuid).collect(),
DyldSubCacheSlice::V2(s) => s.iter().map(|e| &e.uuid).collect(),
};
for (&data, uuid) in subcache_data.iter().zip(uuids) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if &sc_header.uuid != uuid {
return Err(Error("Unexpected SubCache UUID"));
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}

// Read the .symbols SubCache, if present.
Expand Down Expand Up @@ -255,27 +281,38 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
}

/// Return the information about subcaches, if present.
///
/// Returns `None` for dyld caches produced before dyld-940 (macOS 12).
pub fn subcaches<'data, R: ReadRef<'data>>(
&self,
endian: E,
data: R,
) -> Result<Option<&'data [macho::DyldSubCacheInfo<E>]>> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
) -> Result<Option<DyldSubCacheSlice<'data, E>>> {
let header_size = self.mapping_offset.get(endian);
if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheEntryV2<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(DyldSubCacheSlice::V2(subcaches)))
} else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheInfo<E>>(
.read_slice_at::<macho::DyldSubCacheEntryV1<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(subcaches))
Ok(Some(DyldSubCacheSlice::V1(subcaches)))
} else {
Ok(None)
}
}

/// Return the UUID for the .symbols subcache, if present.
pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let uuid = self.symbols_subcache_uuid;
if uuid != [0; 16] {
return Some(uuid);
Expand All @@ -290,7 +327,7 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
endian: E,
data: R,
) -> Result<&'data [macho::DyldCacheImageInfo<E>]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
data.read_slice_at::<macho::DyldCacheImageInfo<E>>(
self.images_across_all_subcaches_offset.get(endian).into(),
self.images_across_all_subcaches_count.get(endian) as usize,
Expand Down

0 comments on commit 1a1fab7

Please sign in to comment.