Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly handle dyld caches on macOS 13 and above #642

Merged
merged 4 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 53 additions & 16 deletions crates/examples/src/bin/dyldcachedump.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use object::read::macho::DyldCache;
use object::macho::DyldCacheHeader;
use object::read::macho::{DyldCache, DyldSubCacheSlice};
use object::Endianness;
use std::{env, fs, process};

Expand All @@ -22,21 +23,34 @@ fn main() {
continue;
}
};
let subcache_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
println!("Failed to map file '{}': {}", file_path, err,);
continue;
}
};

let subcaches_info = match get_subcache_info(&file) {
Ok(subcaches_info) => subcaches_info,
Err(err) => {
println!(
"Failed to parse Dyld shared cache file '{}': {}",
file_path, err,
);
continue;
}
};
let subcache_files = subcaches_info
.map(|info| open_subcaches(&file_path, info))
.unwrap_or_default();
let subcache_files: Option<Vec<_>> = subcache_files
.into_iter()
.map(
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
Ok(mmap) => Some(mmap),
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err);
println!("Failed to map file '{}': {}", file_path, err);
None
}
},
Expand Down Expand Up @@ -69,28 +83,51 @@ fn main() {
}
}

/// Gets the slice of subcache info structs from the header of the main cache.
fn get_subcache_info(
main_cache_data: &[u8],
) -> object::read::Result<Option<DyldSubCacheSlice<'_, Endianness>>> {
let header = DyldCacheHeader::<Endianness>::parse(main_cache_data)?;
let (_arch, endian) = header.parse_magic()?;
let subcaches_info = header.subcaches(endian, main_cache_data)?;
Ok(subcaches_info)
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2, ..., filename.symbols
// or filename.01, filename.02 on macOS 13
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
// or filename.01, filename.02, ..., filename.symbols on macOS 13
fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice<Endianness>) -> Vec<fs::File> {
let subcache_suffixes: Vec<String> = match subcaches_info {
DyldSubCacheSlice::V1(subcaches) => {
// macOS 12: Subcaches have the file suffixes .1, .2, .3 etc.
(1..subcaches.len() + 1).map(|i| format!(".{i}")).collect()
}
DyldSubCacheSlice::V2(subcaches) => {
// macOS 13+: The subcache file suffix is written down in the header of the main cache.
subcaches
.iter()
.map(|s| {
// The suffix is a nul-terminated string in a fixed-size byte array.
let suffix = s.file_suffix;
let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len());
String::from_utf8_lossy(&suffix[..len]).to_string()
})
.collect()
}
_ => panic!(
"If this case is hit, it means that someone added a variant to the (non-exhaustive) \
DyldSubCacheSlice enum and forgot to update this example"
),
};
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
for suffix in subcache_suffixes {
let subcache_path = format!("{path}{suffix}");
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
if files.is_empty() {
for i in 1.. {
let subcache_path = format!("{}.{:02}", path, i);
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
}
let symbols_subcache_path = format!("{}.symbols", path);
if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) {
files.push(subcache_file);
Expand Down
54 changes: 45 additions & 9 deletions crates/examples/src/bin/objdump.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use object::{macho::DyldCacheHeader, read::macho::DyldSubCacheSlice, Endianness};
use object_examples::objdump;
use std::{env, fs, io, process};

Expand All @@ -18,14 +19,17 @@ fn main() {
process::exit(1);
}
};
let extra_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
};
let subcaches_info = get_subcache_info_if_dyld_cache(&file).ok().flatten();
let extra_files = subcaches_info
.map(|info| open_subcaches(&file_path, info))
.unwrap_or_default();
let extra_files: Vec<_> = extra_files
.into_iter()
.map(
Expand All @@ -52,17 +56,48 @@ fn main() {
.unwrap();
}

/// Gets the slice of subcache info structs from the header of the main cache,
/// if `main_cache_data` is the data of a Dyld shared cache.
fn get_subcache_info_if_dyld_cache(
main_cache_data: &[u8],
) -> object::read::Result<Option<DyldSubCacheSlice<'_, Endianness>>> {
let header = DyldCacheHeader::<Endianness>::parse(main_cache_data)?;
let (_arch, endian) = header.parse_magic()?;
let subcaches_info = header.subcaches(endian, main_cache_data)?;
Ok(subcaches_info)
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2 etc.
// Read those files now, if they exist, even if we don't know that
// we're dealing with a dyld shared cache. By the time we know what
// we're dealing with, it's too late to read more files.
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
// with the names filename.1, filename.2, ..., filename.symbols
// or filename.01, filename.02, ..., filename.symbols on macOS 13
fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice<Endianness>) -> Vec<fs::File> {
let subcache_suffixes: Vec<String> = match subcaches_info {
DyldSubCacheSlice::V1(subcaches) => {
// macOS 12: Subcaches have the file suffixes .1, .2, .3 etc.
(1..subcaches.len() + 1).map(|i| format!(".{i}")).collect()
}
DyldSubCacheSlice::V2(subcaches) => {
// macOS 13+: The subcache file suffix is written down in the header of the main cache.
subcaches
.iter()
.map(|s| {
// The suffix is a nul-terminated string in a fixed-size byte array.
let suffix = s.file_suffix;
let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len());
String::from_utf8_lossy(&suffix[..len]).to_string()
})
.collect()
}
_ => panic!(
"If this case is hit, it means that someone added a variant to the (non-exhaustive) \
DyldSubCacheSlice enum and forgot to update this example"
),
};
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
match fs::File::open(&subcache_path) {
for suffix in subcache_suffixes {
let subcache_path = format!("{path}{suffix}");
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
Expand All @@ -71,5 +106,6 @@ fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) {
files.push(subcache_file);
};
println!("Found {} subcache files", files.len());
files
}
2 changes: 1 addition & 1 deletion crates/examples/src/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn print<W: Write, E: Write>(
let path = match image.path() {
Ok(path) => path,
Err(err) => {
writeln!(e, "Failed to parse dydld image name: {}", err)?;
writeln!(e, "Failed to parse dyld image name: {}", err)?;
continue;
}
};
Expand Down
27 changes: 21 additions & 6 deletions src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,15 +379,29 @@ pub struct DyldCacheImageInfo<E: Endian> {
pub pad: U32<E>,
}

/// Corresponds to a struct whose source code has not been published as of Nov 2021.
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
/// Added in dyld-940, which shipped with macOS 12 / iOS 15.
/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1`
/// in dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheInfo<E: Endian> {
pub struct DyldSubCacheEntryV1<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The size of this subcache plus all previous subcaches.
pub cumulative_size: U64<E>,
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
}

/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16.
/// Called `dyld_subcache_entry` as of dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheEntryV2<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
/// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development".
pub file_suffix: [u8; 32],
}

// Definitions from "/usr/include/mach-o/loader.h".
Expand Down Expand Up @@ -3253,7 +3267,8 @@ unsafe_impl_endian_pod!(
DyldCacheHeader,
DyldCacheMappingInfo,
DyldCacheImageInfo,
DyldSubCacheInfo,
DyldSubCacheEntryV1,
DyldSubCacheEntryV2,
MachHeader32,
MachHeader64,
LoadCommand,
Expand Down
75 changes: 57 additions & 18 deletions src/read/macho/dyld_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,25 @@ where
mappings: &'data [macho::DyldCacheMappingInfo<E>],
}

// This is the offset of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4;
/// A slice of structs describing each subcache. The struct gained
/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16),
/// so this is an enum of the two possible slice types.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum DyldSubCacheSlice<'data, E: Endian> {
mstange marked this conversation as resolved.
Show resolved Hide resolved
/// V1, used between dyld-940 and dyld-1042.1.
V1(&'data [macho::DyldSubCacheEntryV1<E>]),
/// V2, used since dyld-1042.1.
V2(&'data [macho::DyldSubCacheEntryV2<E>]),
}

// This is the offset of the end of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8;

// This is the offset of the end of the cacheSubType field.
// This field comes right after the images_across_all_subcaches_count field,
// and we don't currently have it in our definition of the DyldCacheHeader type.
const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0;

impl<'data, E, R> DyldCache<'data, E, R>
where
Expand All @@ -51,9 +68,13 @@ where
let mappings = header.mappings(endian, data)?;

let symbols_subcache_uuid = header.symbols_subcache_uuid(endian);
let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]);

if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize {
let subcaches_info = header.subcaches(endian, data)?;
let subcaches_count = match subcaches_info {
Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(),
Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(),
None => 0,
};
if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize {
return Err(Error("Incorrect number of SubCaches"));
}

Expand All @@ -66,15 +87,22 @@ where
(None, subcache_data)
};

// Read the regular SubCaches (.1, .2, ...), if present.
// Read the regular SubCaches, if present.
let mut subcaches = Vec::new();
for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if sc_header.uuid != info.uuid {
return Err(Error("Unexpected SubCache UUID"));
if let Some(subcaches_info) = subcaches_info {
let (v1, v2) = match subcaches_info {
DyldSubCacheSlice::V1(s) => (s, &[][..]),
DyldSubCacheSlice::V2(s) => (&[][..], s),
};
let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid));
for (&data, uuid) in subcache_data.iter().zip(uuids) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if &sc_header.uuid != uuid {
return Err(Error("Unexpected SubCache UUID"));
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}

// Read the .symbols SubCache, if present.
Expand Down Expand Up @@ -255,27 +283,38 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
}

/// Return the information about subcaches, if present.
///
/// Returns `None` for dyld caches produced before dyld-940 (macOS 12).
pub fn subcaches<'data, R: ReadRef<'data>>(
&self,
endian: E,
data: R,
) -> Result<Option<&'data [macho::DyldSubCacheInfo<E>]>> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
) -> Result<Option<DyldSubCacheSlice<'data, E>>> {
philipc marked this conversation as resolved.
Show resolved Hide resolved
let header_size = self.mapping_offset.get(endian);
if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheEntryV2<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(DyldSubCacheSlice::V2(subcaches)))
} else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheInfo<E>>(
.read_slice_at::<macho::DyldSubCacheEntryV1<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(subcaches))
Ok(Some(DyldSubCacheSlice::V1(subcaches)))
} else {
Ok(None)
}
}

/// Return the UUID for the .symbols subcache, if present.
pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let uuid = self.symbols_subcache_uuid;
if uuid != [0; 16] {
return Some(uuid);
Expand All @@ -290,7 +329,7 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
endian: E,
data: R,
) -> Result<&'data [macho::DyldCacheImageInfo<E>]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
data.read_slice_at::<macho::DyldCacheImageInfo<E>>(
self.images_across_all_subcaches_offset.get(endian).into(),
self.images_across_all_subcaches_count.get(endian) as usize,
Expand Down
Loading