diff --git a/Cargo.lock b/Cargo.lock index 0ffd53827..b74e3a1ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -438,6 +438,26 @@ dependencies = [ "anyhow", ] +[[package]] +name = "capstone" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08ca438d9585a2b216b0c2e88ea51e096286c5f197f7be2526bb515ef775b6c" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7183271711ffb7c63a6480e4baf480e0140da59eeba9b18fcc8bf3478950e3" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "cargo-platform" version = "0.1.2" @@ -3161,6 +3181,12 @@ dependencies = [ "spin 0.5.2", ] +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + [[package]] name = "lib-dice" version = "0.1.0" @@ -4104,6 +4130,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc_version" version = "0.2.3" @@ -6392,6 +6424,7 @@ dependencies = [ "build-kconfig", "byteorder", "bzip2-rs", + "capstone", "cargo_metadata", "clap 3.2.23", "colored", @@ -6404,6 +6437,7 @@ dependencies = [ "hex", "hubtools", "indexmap 1.9.1", + "leb128", "lpc55-rom-data", "lpc55_sign", "memchr", @@ -6413,6 +6447,7 @@ dependencies = [ "rangemap", "regex", "ron", + "rustc-demangle", "scroll", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 1035a8f21..b6a7a3294 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ bitflags = { version = "2.5.0", default-features = false } bstringify = { version = "0.1.2", default-features = false } byteorder = { version = "1.3.4", default-features = false } bzip2-rs = { version = "0.1.2", default-features = false } +capstone = { version = "0.12.0", default-features = false, features = ["full"] } cargo_metadata = { version = "0.12.0", default-features = false } cfg-if = { version = "1", default-features = false } chrono = { version = "0.4", default-features = false } @@ -70,6 +71,7 @@ hubpack = { version = "0.1.2", default-features = false } indexmap = { version = "1.4.0", default-features = false, features = ["serde-1"] } indoc = { version = "2.0.3", default-features = false } itertools = { version = "0.10.5", default-features = false } +leb128 = { version = "0.2.5", default-features = false } lpc55-pac = { version = "0.4", default-features = false } memchr = { version = "2.4", default-features = false } memoffset = { version = "0.6.5", default-features = false } @@ -90,6 +92,7 @@ rand_core = { version = "0.6", default-features = false } rangemap = { version = "1.3", default-features = false } regex = { version = "1", default-features = false, features = ["std", "perf", "unicode-perl"] } ron = { version = "0.8", default-features = false } +rustc-demangle = { version = "0.1.24", default-features = false } scroll = { version = "0.10", default-features = false } serde = { version = "1.0.114", default-features = false, features = ["derive"] } serde-big-array = { version = "0.4", default-features = false } diff --git a/app/demo-stm32g0-nucleo/app-g070.toml b/app/demo-stm32g0-nucleo/app-g070.toml index 6aca28762..5779e6f76 100644 --- a/app/demo-stm32g0-nucleo/app-g070.toml +++ b/app/demo-stm32g0-nucleo/app-g070.toml @@ -65,8 +65,8 @@ stacksize = 256 name = "task-ping" features = ["uart", "no-ipc-counters"] priority = 4 -max-sizes = {flash = 8192, ram = 512} -stacksize = 256 +max-sizes = {flash = 8192, ram = 1024} +stacksize = 512 start = true task-slots = [{peer = "pong"}, "usart_driver"] diff --git a/app/gimletlet/app.toml b/app/gimletlet/app.toml index 262dd80b1..9a7baef24 100644 --- a/app/gimletlet/app.toml +++ b/app/gimletlet/app.toml @@ -55,7 +55,7 @@ tasks = ["control_plane_agent", "caboose_reader"] name = "task-control-plane-agent" priority = 7 max-sizes = {flash = 131072, ram = 32768} -stacksize = 4096 +stacksize = 6000 start = true uses = ["usart1"] task-slots = [ diff --git a/build/xtask/Cargo.toml b/build/xtask/Cargo.toml index af7de888b..21f88bf41 100644 --- a/build/xtask/Cargo.toml +++ b/build/xtask/Cargo.toml @@ -17,6 +17,7 @@ toml_edit = { workspace = true } # for dist byteorder = { workspace = true } bzip2-rs = { workspace = true } +capstone = { workspace = true } ctrlc = { workspace = true } dunce = { workspace = true } filetime = { workspace = true } @@ -25,6 +26,7 @@ goblin = { workspace = true } hex = "0.4" hubtools = { workspace = true } indexmap = { workspace = true } +leb128 = { workspace = true } multimap = { workspace = true } path-slash = { workspace = true } rangemap = { workspace = true } @@ -34,6 +36,7 @@ scroll = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } sha3 = { workspace = true } +rustc-demangle = { workspace = true } tlvc = { workspace = true } tlvc-text = { workspace = true } toml = { workspace = true } diff --git a/build/xtask/src/config.rs b/build/xtask/src/config.rs index d728448b6..8366c84fe 100644 --- a/build/xtask/src/config.rs +++ b/build/xtask/src/config.rs @@ -702,6 +702,7 @@ impl BuildConfig<'_> { // nightly features that we use: nightly_features.extend([ "asm_const", + "emit_stack_sizes", "naked_functions", "used_with_arg", ]); diff --git a/build/xtask/src/dist.rs b/build/xtask/src/dist.rs index d6b7d4a87..1eb711790 100644 --- a/build/xtask/src/dist.rs +++ b/build/xtask/src/dist.rs @@ -474,12 +474,23 @@ pub fn package( }) .collect::>()?; - // Resolve task slots in our linked files + // Check stack sizes and resolve task slots in our linked files + let mut possible_stack_overflow = vec![]; for task_name in cfg.toml.tasks.keys() { if tasks_to_build.contains(task_name.as_str()) { + if task_can_overflow(&cfg.toml, task_name, verbose)? { + possible_stack_overflow.push(task_name); + } + resolve_task_slots(&cfg, task_name, image_name)?; } } + if !possible_stack_overflow.is_empty() { + bail!( + "tasks may overflow: {possible_stack_overflow:?}; \ + see logs above" + ); + } // Add an empty output section for the caboose // @@ -1069,6 +1080,301 @@ fn build_task(cfg: &PackageConfig, name: &str) -> Result<()> { .context(format!("failed to build {}", name)) } +/// Checks whether the given task can overflow its stack +/// +/// False negatives are possible if the deepest posssible stack uses dynamic +/// dispatch or function pointers; false positives are technically possible but +/// unlikely if there's a logically unreachable section of the call graph. +fn task_can_overflow( + toml: &Config, + task_name: &str, + verbose: bool, +) -> Result { + let max_stack = get_max_stack(toml, task_name, verbose)?; + let max_depth: u64 = max_stack.iter().map(|(d, _)| *d).sum(); + + let task_stack_size = toml.tasks[task_name] + .stacksize + .unwrap_or_else(|| toml.stacksize.unwrap()); + let can_overflow = max_depth >= task_stack_size as u64; + if verbose || can_overflow { + let extra = if can_overflow { + format!( + " exceeds task stack size: {max_depth} >= {task_stack_size}" + ) + } else { + format!( + ": {max_depth} bytes \ + (< task stack size of {task_stack_size} bytes)" + ) + }; + println!("deepest stack for {task_name}{extra}"); + for (frame_size, name) in max_stack { + let s = format!("[+{frame_size}]"); + println!(" {s:>7} {name}"); + } + Ok(can_overflow) + } else { + Ok(false) + } +} + +/// Estimates the maximum stack size for the given task +/// +/// This does not take dynamic function calls into account, which could cause +/// underestimation. Overestimation is less likely, but still may happen if +/// there are logically impossible call trees (e.g. `A -> B` and `B -> C`, but +/// `B` never calls `C` if called by `A`). +pub fn get_max_stack( + toml: &Config, + task_name: &str, + verbose: bool, +) -> Result> { + // Open the statically-linked ELF file + let f = Path::new("target") + .join(&toml.name) + .join("dist") + .join(format!("{task_name}.tmp")); + let data = std::fs::read(f).context("could not open ELF file")?; + let elf = goblin::elf::Elf::parse(&data)?; + + // Read the .stack_sizes section, which is an array of + // `(address: u32, stack size: unsigned leb128)` tuples + let sizes = crate::elf::get_section_by_name(&elf, ".stack_sizes") + .context("could not get .stack_sizes")?; + let mut sizes = &data[sizes.sh_offset as usize..][..sizes.sh_size as usize]; + let mut addr_to_frame_size = BTreeMap::new(); + while !sizes.is_empty() { + let (addr, rest) = sizes.split_at(4); + let addr = u32::from_le_bytes(addr.try_into().unwrap()); + sizes = rest; + let size = leb128::read::unsigned(&mut sizes)?; + addr_to_frame_size.insert(addr, size); + } + + // There are `$t` and `$d` symbols which indicate the beginning of text + // versus data in the `.text` region. We collect them into a `BTreeMap` + // here so that we can avoid trying to decode inline data words. + let mut text_regions = BTreeMap::new(); + for sym in elf.syms.iter() { + if sym.st_name == 0 + || sym.st_size != 0 + || sym.st_type() != goblin::elf::sym::STT_NOTYPE + { + continue; + } + + let addr = sym.st_value as u32; + let is_text = match elf.strtab.get_at(sym.st_name) { + Some("$t") => true, + Some("$d") => false, + Some(_) => continue, + None => { + bail!("bad symbol in {task_name}: {}", sym.st_name); + } + }; + text_regions.insert(addr, is_text); + } + let is_code = |addr| { + let mut iter = text_regions.range(..=addr); + *iter.next_back().unwrap().1 + }; + + // We'll be packing everything into this data structure + #[derive(Debug)] + struct FunctionData { + name: String, + short_name: String, + frame_size: Option, + calls: BTreeSet, + } + + let text = crate::elf::get_section_by_name(&elf, ".text") + .context("could not get .text")?; + + use capstone::{ + arch::{arm, ArchOperand, BuildsCapstone, BuildsCapstoneExtraMode}, + Capstone, InsnGroupId, InsnGroupType, + }; + let cs = Capstone::new() + .arm() + .mode(arm::ArchMode::Thumb) + .extra_mode(std::iter::once(arm::ArchExtraMode::MClass)) + .detail(true) + .build() + .map_err(|e| anyhow!("failed to initialize disassembler: {e:?}"))?; + + // Disassemble each function, building a map of its call sites + let mut fns = BTreeMap::new(); + for sym in elf.syms.iter() { + // We only care about named function symbols here + if sym.st_name == 0 || !sym.is_function() || sym.st_size == 0 { + continue; + } + + let Some(name) = elf.strtab.get_at(sym.st_name) else { + bail!("bad symbol in {task_name}: {}", sym.st_name); + }; + + // Clear the lowest bit, which indicates that the function contains + // thumb instructions (always true for our systems!) + let val = sym.st_value & !1; + let base_addr = val as u32; + + // Get the text region for this function + let offset = (val - text.sh_addr + text.sh_offset) as usize; + let text = &data[offset..][..sym.st_size as usize]; + + // Split the text region into instruction-only chunks + let mut chunks = vec![]; + let mut chunk = None; + for (i, b) in text.iter().enumerate() { + let addr = base_addr + i as u32; + if is_code(addr) { + chunk.get_or_insert((addr, vec![])).1.push(*b); + } else { + chunks.extend(chunk.take()); + } + } + chunks.extend(chunk); // don't forget the trailing chunk! + + let mut calls = BTreeSet::new(); + for (addr, chunk) in chunks { + let instrs = cs + .disasm_all(&chunk, addr.into()) + .map_err(|e| anyhow!("disassembly failed: {e:?}"))?; + for instr in instrs.iter() { + let detail = cs.insn_detail(instr).map_err(|e| { + anyhow!("could not get instruction details: {e}") + })?; + if detail.groups().iter().any(|g| { + g == &InsnGroupId(InsnGroupType::CS_GRP_CALL as u8) + }) { + let arch = detail.arch_detail(); + let ops = arch.operands(); + let op = ops.last().unwrap_or_else(|| { + panic!("missing operand!"); + }); + + let ArchOperand::ArmOperand(op) = op else { + panic!("bad operand type: {op:?}"); + }; + // We can't resolve indirect calls, alas + let arm::ArmOperandType::Imm(target) = op.op_type else { + continue; + }; + let target = u32::try_from(target).unwrap(); + + // Avoid recursive calls into the same function (or midway + // into the function, which is a thing we've seen before! + // it's weird!) + if !(base_addr..base_addr + sym.st_size as u32) + .contains(&target) + { + calls.insert(target); + } + } + } + } + + let name = rustc_demangle::demangle(name).to_string(); + + // Strip the trailing hash from the name for ease of printing + let short_name = if let Some(i) = name.rfind("::") { + &name[..i] + } else { + &name + } + .to_owned(); + + fns.insert( + base_addr, + FunctionData { + name, + short_name, + frame_size: addr_to_frame_size.get(&base_addr).map(|i| *i), + calls, + }, + ); + } + + fn recurse( + call_stack: &mut Vec, + recurse_depth: usize, + mut stack_depth: u64, + fns: &BTreeMap, + deepest: &mut Option<(u64, Vec)>, + verbose: bool, + ) { + let addr = *call_stack.last().unwrap(); + let Some(f) = fns.get(&addr) else { + panic!("found jump to unknown function at {call_stack:08x?}"); + }; + let frame_size = f.frame_size.unwrap_or(0); + stack_depth += frame_size; + if verbose { + let indent = recurse_depth * 2; + println!( + " {:indent$}{addr:08x}: {} [+{frame_size} => {stack_depth}]", + "", + f.short_name, + indent = indent + ); + } + + if deepest + .as_ref() + .map(|(max_depth, _)| stack_depth > *max_depth) + .unwrap_or(true) + { + *deepest = Some((stack_depth, call_stack.to_owned())); + } + for j in &f.calls { + if call_stack.contains(j) { + // Skip recursive / mutually recursive calls, because we can't + // reason about them. + continue; + } else { + call_stack.push(*j); + recurse( + call_stack, + recurse_depth + 1, + stack_depth, + fns, + deepest, + verbose, + ); + call_stack.pop(); + } + } + } + + // Find stack sizes by traversing the graph + if verbose { + println!("finding stack sizes for {task_name}"); + } + let start_addr = fns + .iter() + .find(|(_addr, v)| v.name.as_str() == "_start") + .map(|(addr, _v)| *addr) + .ok_or_else(|| anyhow!("could not find _start"))?; + let mut deepest = None; + recurse(&mut vec![start_addr], 0, 0, &fns, &mut deepest, verbose); + + // Check against our configured task stack size + let Some((_max_depth, max_stack)) = deepest else { + unreachable!("must have at least one call stack"); + }; + + let mut out = vec![]; + for m in max_stack { + let f = fns.get(&m).unwrap(); + let name = &f.short_name; + out.push((f.frame_size.unwrap_or(0), name.clone())); + } + Ok(out) +} + /// Link a specific task fn link_task( cfg: &PackageConfig, @@ -1661,6 +1967,7 @@ fn build( "-C link-arg=-z -C link-arg=common-page-size=0x20 \ -C link-arg=-z -C link-arg=max-page-size=0x20 \ -C llvm-args=--enable-machine-outliner=never \ + -Z emit-stack-sizes \ -C overflow-checks=y \ -C metadata={} \ {} diff --git a/build/xtask/src/sizes.rs b/build/xtask/src/sizes.rs index 151c73810..bb7cd7ab6 100644 --- a/build/xtask/src/sizes.rs +++ b/build/xtask/src/sizes.rs @@ -15,7 +15,9 @@ use indexmap::map::Entry; use indexmap::IndexMap; use crate::{ - dist::{Allocations, ContiguousRanges, DEFAULT_KERNEL_STACK}, + dist::{ + get_max_stack, Allocations, ContiguousRanges, DEFAULT_KERNEL_STACK, + }, Config, }; @@ -67,6 +69,8 @@ pub fn run( print_memory_map(&toml, &map, verbose)?; print!("\n\n"); print_task_table(&toml, &map)?; + print!("\n\n"); + print_task_stacks(&toml)?; } // Because tasks are autosized, the only place where we can improve @@ -423,6 +427,25 @@ fn print_memory_map( Ok(()) } +fn print_task_stacks(toml: &Config) -> Result<()> { + for (i, (task_name, task)) in toml.tasks.iter().enumerate() { + let task_stack_size = + task.stacksize.unwrap_or_else(|| toml.stacksize.unwrap()); + + let max_stack = get_max_stack(&toml, task_name, false)?; + let total: u64 = max_stack.iter().map(|(n, _)| *n).sum(); + println!("{task_name}: {total} bytes (limit is {task_stack_size})"); + for (frame_size, name) in max_stack { + let s = format!("[+{frame_size}]"); + println!(" {s:>7} {name}"); + } + if i + 1 < toml.tasks.len() { + println!(); + } + } + Ok(()) +} + /// Loads the size of the given task (or kernel) pub fn load_task_size<'a>( toml: &'a Config,