diff --git a/Cargo.lock b/Cargo.lock
index 0ffd53827..b74e3a1ea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -438,6 +438,26 @@ dependencies = [
  "anyhow",
 ]
 
+[[package]]
+name = "capstone"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b08ca438d9585a2b216b0c2e88ea51e096286c5f197f7be2526bb515ef775b6c"
+dependencies = [
+ "capstone-sys",
+ "libc",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe7183271711ffb7c63a6480e4baf480e0140da59eeba9b18fcc8bf3478950e3"
+dependencies = [
+ "cc",
+ "libc",
+]
+
 [[package]]
 name = "cargo-platform"
 version = "0.1.2"
@@ -3161,6 +3181,12 @@ dependencies = [
  "spin 0.5.2",
 ]
 
+[[package]]
+name = "leb128"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
+
 [[package]]
 name = "lib-dice"
 version = "0.1.0"
@@ -4104,6 +4130,12 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
 [[package]]
 name = "rustc_version"
 version = "0.2.3"
@@ -6392,6 +6424,7 @@ dependencies = [
  "build-kconfig",
  "byteorder",
  "bzip2-rs",
+ "capstone",
  "cargo_metadata",
  "clap 3.2.23",
  "colored",
@@ -6404,6 +6437,7 @@ dependencies = [
  "hex",
  "hubtools",
  "indexmap 1.9.1",
+ "leb128",
  "lpc55-rom-data",
  "lpc55_sign",
  "memchr",
@@ -6413,6 +6447,7 @@ dependencies = [
  "rangemap",
  "regex",
  "ron",
+ "rustc-demangle",
  "scroll",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 1035a8f21..b6a7a3294 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,6 +38,7 @@ bitflags = { version = "2.5.0", default-features = false }
 bstringify = { version = "0.1.2", default-features = false }
 byteorder = { version = "1.3.4", default-features = false }
 bzip2-rs = { version = "0.1.2", default-features = false }
+capstone = { version = "0.12.0", default-features = false, features = ["full"] }
 cargo_metadata = { version = "0.12.0", default-features = false }
 cfg-if = { version = "1", default-features = false }
 chrono = { version = "0.4", default-features = false }
@@ -70,6 +71,7 @@ hubpack = { version = "0.1.2", default-features = false }
 indexmap = { version = "1.4.0", default-features = false, features = ["serde-1"] }
 indoc = { version = "2.0.3", default-features = false }
 itertools = { version = "0.10.5", default-features = false }
+leb128 = { version = "0.2.5", default-features = false }
 lpc55-pac = { version = "0.4", default-features = false }
 memchr = { version = "2.4", default-features = false }
 memoffset = { version = "0.6.5", default-features = false }
@@ -90,6 +92,7 @@ rand_core = { version = "0.6", default-features = false }
 rangemap = { version = "1.3", default-features = false }
 regex = { version = "1", default-features = false, features = ["std", "perf", "unicode-perl"] }
 ron = { version = "0.8", default-features = false }
+rustc-demangle = { version = "0.1.24", default-features = false }
 scroll = { version = "0.10", default-features = false }
 serde = { version = "1.0.114", default-features = false, features = ["derive"] }
 serde-big-array = { version = "0.4", default-features = false }
diff --git a/app/demo-stm32g0-nucleo/app-g070.toml b/app/demo-stm32g0-nucleo/app-g070.toml
index 6aca28762..5779e6f76 100644
--- a/app/demo-stm32g0-nucleo/app-g070.toml
+++ b/app/demo-stm32g0-nucleo/app-g070.toml
@@ -65,8 +65,8 @@ stacksize = 256
 name = "task-ping"
 features = ["uart", "no-ipc-counters"]
 priority = 4
-max-sizes = {flash = 8192, ram = 512}
-stacksize = 256
+max-sizes = {flash = 8192, ram = 1024}
+stacksize = 512
 start = true
 task-slots = [{peer = "pong"}, "usart_driver"]
 
diff --git a/app/gimletlet/app.toml b/app/gimletlet/app.toml
index 262dd80b1..9a7baef24 100644
--- a/app/gimletlet/app.toml
+++ b/app/gimletlet/app.toml
@@ -55,7 +55,7 @@ tasks = ["control_plane_agent", "caboose_reader"]
 name = "task-control-plane-agent"
 priority = 7
 max-sizes = {flash = 131072, ram = 32768}
-stacksize = 4096
+stacksize = 6000
 start = true
 uses = ["usart1"]
 task-slots = [
diff --git a/build/xtask/Cargo.toml b/build/xtask/Cargo.toml
index af7de888b..21f88bf41 100644
--- a/build/xtask/Cargo.toml
+++ b/build/xtask/Cargo.toml
@@ -17,6 +17,7 @@ toml_edit = { workspace = true }
 # for dist
 byteorder = { workspace = true }
 bzip2-rs = { workspace = true }
+capstone = { workspace = true }
 ctrlc = { workspace = true }
 dunce = { workspace = true }
 filetime = { workspace = true }
@@ -25,6 +26,7 @@ goblin = { workspace = true }
 hex = "0.4"
 hubtools = { workspace = true }
 indexmap = { workspace = true }
+leb128 = { workspace = true }
 multimap = { workspace = true }
 path-slash = { workspace = true }
 rangemap = { workspace = true }
@@ -34,6 +36,7 @@ scroll = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 sha3 = { workspace = true }
+rustc-demangle = { workspace = true }
 tlvc = { workspace = true }
 tlvc-text = { workspace = true }
 toml = { workspace = true }
diff --git a/build/xtask/src/config.rs b/build/xtask/src/config.rs
index d728448b6..8366c84fe 100644
--- a/build/xtask/src/config.rs
+++ b/build/xtask/src/config.rs
@@ -702,6 +702,7 @@ impl BuildConfig<'_> {
         // nightly features that we use:
         nightly_features.extend([
             "asm_const",
+            "emit_stack_sizes",
             "naked_functions",
             "used_with_arg",
         ]);
diff --git a/build/xtask/src/dist.rs b/build/xtask/src/dist.rs
index d6b7d4a87..1eb711790 100644
--- a/build/xtask/src/dist.rs
+++ b/build/xtask/src/dist.rs
@@ -474,12 +474,23 @@ pub fn package(
             })
             .collect::<Result<_, _>>()?;
 
-        // Resolve task slots in our linked files
+        // Check stack sizes and resolve task slots in our linked files
+        let mut possible_stack_overflow = vec![];
         for task_name in cfg.toml.tasks.keys() {
             if tasks_to_build.contains(task_name.as_str()) {
+                if task_can_overflow(&cfg.toml, task_name, verbose)? {
+                    possible_stack_overflow.push(task_name);
+                }
+
                 resolve_task_slots(&cfg, task_name, image_name)?;
             }
         }
+        if !possible_stack_overflow.is_empty() {
+            bail!(
+                "tasks may overflow: {possible_stack_overflow:?}; \
+                 see logs above"
+            );
+        }
 
         // Add an empty output section for the caboose
         //
@@ -1069,6 +1080,301 @@ fn build_task(cfg: &PackageConfig, name: &str) -> Result<()> {
         .context(format!("failed to build {}", name))
 }
 
+/// Checks whether the given task can overflow its stack
+///
+/// False negatives are possible if the deepest posssible stack uses dynamic
+/// dispatch or function pointers; false positives are technically possible but
+/// unlikely if there's a logically unreachable section of the call graph.
+fn task_can_overflow(
+    toml: &Config,
+    task_name: &str,
+    verbose: bool,
+) -> Result<bool> {
+    let max_stack = get_max_stack(toml, task_name, verbose)?;
+    let max_depth: u64 = max_stack.iter().map(|(d, _)| *d).sum();
+
+    let task_stack_size = toml.tasks[task_name]
+        .stacksize
+        .unwrap_or_else(|| toml.stacksize.unwrap());
+    let can_overflow = max_depth >= task_stack_size as u64;
+    if verbose || can_overflow {
+        let extra = if can_overflow {
+            format!(
+                " exceeds task stack size: {max_depth} >= {task_stack_size}"
+            )
+        } else {
+            format!(
+                ": {max_depth} bytes \
+                (< task stack size of {task_stack_size} bytes)"
+            )
+        };
+        println!("deepest stack for {task_name}{extra}");
+        for (frame_size, name) in max_stack {
+            let s = format!("[+{frame_size}]");
+            println!("  {s:>7} {name}");
+        }
+        Ok(can_overflow)
+    } else {
+        Ok(false)
+    }
+}
+
+/// Estimates the maximum stack size for the given task
+///
+/// This does not take dynamic function calls into account, which could cause
+/// underestimation.  Overestimation is less likely, but still may happen if
+/// there are logically impossible call trees (e.g. `A -> B` and `B -> C`, but
+/// `B` never calls `C` if called by `A`).
+pub fn get_max_stack(
+    toml: &Config,
+    task_name: &str,
+    verbose: bool,
+) -> Result<Vec<(u64, String)>> {
+    // Open the statically-linked ELF file
+    let f = Path::new("target")
+        .join(&toml.name)
+        .join("dist")
+        .join(format!("{task_name}.tmp"));
+    let data = std::fs::read(f).context("could not open ELF file")?;
+    let elf = goblin::elf::Elf::parse(&data)?;
+
+    // Read the .stack_sizes section, which is an array of
+    // `(address: u32, stack size: unsigned leb128)` tuples
+    let sizes = crate::elf::get_section_by_name(&elf, ".stack_sizes")
+        .context("could not get .stack_sizes")?;
+    let mut sizes = &data[sizes.sh_offset as usize..][..sizes.sh_size as usize];
+    let mut addr_to_frame_size = BTreeMap::new();
+    while !sizes.is_empty() {
+        let (addr, rest) = sizes.split_at(4);
+        let addr = u32::from_le_bytes(addr.try_into().unwrap());
+        sizes = rest;
+        let size = leb128::read::unsigned(&mut sizes)?;
+        addr_to_frame_size.insert(addr, size);
+    }
+
+    // There are `$t` and `$d` symbols which indicate the beginning of text
+    // versus data in the `.text` region.  We collect them into a `BTreeMap`
+    // here so that we can avoid trying to decode inline data words.
+    let mut text_regions = BTreeMap::new();
+    for sym in elf.syms.iter() {
+        if sym.st_name == 0
+            || sym.st_size != 0
+            || sym.st_type() != goblin::elf::sym::STT_NOTYPE
+        {
+            continue;
+        }
+
+        let addr = sym.st_value as u32;
+        let is_text = match elf.strtab.get_at(sym.st_name) {
+            Some("$t") => true,
+            Some("$d") => false,
+            Some(_) => continue,
+            None => {
+                bail!("bad symbol in {task_name}: {}", sym.st_name);
+            }
+        };
+        text_regions.insert(addr, is_text);
+    }
+    let is_code = |addr| {
+        let mut iter = text_regions.range(..=addr);
+        *iter.next_back().unwrap().1
+    };
+
+    // We'll be packing everything into this data structure
+    #[derive(Debug)]
+    struct FunctionData {
+        name: String,
+        short_name: String,
+        frame_size: Option<u64>,
+        calls: BTreeSet<u32>,
+    }
+
+    let text = crate::elf::get_section_by_name(&elf, ".text")
+        .context("could not get .text")?;
+
+    use capstone::{
+        arch::{arm, ArchOperand, BuildsCapstone, BuildsCapstoneExtraMode},
+        Capstone, InsnGroupId, InsnGroupType,
+    };
+    let cs = Capstone::new()
+        .arm()
+        .mode(arm::ArchMode::Thumb)
+        .extra_mode(std::iter::once(arm::ArchExtraMode::MClass))
+        .detail(true)
+        .build()
+        .map_err(|e| anyhow!("failed to initialize disassembler: {e:?}"))?;
+
+    // Disassemble each function, building a map of its call sites
+    let mut fns = BTreeMap::new();
+    for sym in elf.syms.iter() {
+        // We only care about named function symbols here
+        if sym.st_name == 0 || !sym.is_function() || sym.st_size == 0 {
+            continue;
+        }
+
+        let Some(name) = elf.strtab.get_at(sym.st_name) else {
+            bail!("bad symbol in {task_name}: {}", sym.st_name);
+        };
+
+        // Clear the lowest bit, which indicates that the function contains
+        // thumb instructions (always true for our systems!)
+        let val = sym.st_value & !1;
+        let base_addr = val as u32;
+
+        // Get the text region for this function
+        let offset = (val - text.sh_addr + text.sh_offset) as usize;
+        let text = &data[offset..][..sym.st_size as usize];
+
+        // Split the text region into instruction-only chunks
+        let mut chunks = vec![];
+        let mut chunk = None;
+        for (i, b) in text.iter().enumerate() {
+            let addr = base_addr + i as u32;
+            if is_code(addr) {
+                chunk.get_or_insert((addr, vec![])).1.push(*b);
+            } else {
+                chunks.extend(chunk.take());
+            }
+        }
+        chunks.extend(chunk); // don't forget the trailing chunk!
+
+        let mut calls = BTreeSet::new();
+        for (addr, chunk) in chunks {
+            let instrs = cs
+                .disasm_all(&chunk, addr.into())
+                .map_err(|e| anyhow!("disassembly failed: {e:?}"))?;
+            for instr in instrs.iter() {
+                let detail = cs.insn_detail(instr).map_err(|e| {
+                    anyhow!("could not get instruction details: {e}")
+                })?;
+                if detail.groups().iter().any(|g| {
+                    g == &InsnGroupId(InsnGroupType::CS_GRP_CALL as u8)
+                }) {
+                    let arch = detail.arch_detail();
+                    let ops = arch.operands();
+                    let op = ops.last().unwrap_or_else(|| {
+                        panic!("missing operand!");
+                    });
+
+                    let ArchOperand::ArmOperand(op) = op else {
+                        panic!("bad operand type: {op:?}");
+                    };
+                    // We can't resolve indirect calls, alas
+                    let arm::ArmOperandType::Imm(target) = op.op_type else {
+                        continue;
+                    };
+                    let target = u32::try_from(target).unwrap();
+
+                    // Avoid recursive calls into the same function (or midway
+                    // into the function, which is a thing we've seen before!
+                    // it's weird!)
+                    if !(base_addr..base_addr + sym.st_size as u32)
+                        .contains(&target)
+                    {
+                        calls.insert(target);
+                    }
+                }
+            }
+        }
+
+        let name = rustc_demangle::demangle(name).to_string();
+
+        // Strip the trailing hash from the name for ease of printing
+        let short_name = if let Some(i) = name.rfind("::") {
+            &name[..i]
+        } else {
+            &name
+        }
+        .to_owned();
+
+        fns.insert(
+            base_addr,
+            FunctionData {
+                name,
+                short_name,
+                frame_size: addr_to_frame_size.get(&base_addr).map(|i| *i),
+                calls,
+            },
+        );
+    }
+
+    fn recurse(
+        call_stack: &mut Vec<u32>,
+        recurse_depth: usize,
+        mut stack_depth: u64,
+        fns: &BTreeMap<u32, FunctionData>,
+        deepest: &mut Option<(u64, Vec<u32>)>,
+        verbose: bool,
+    ) {
+        let addr = *call_stack.last().unwrap();
+        let Some(f) = fns.get(&addr) else {
+            panic!("found jump to unknown function at {call_stack:08x?}");
+        };
+        let frame_size = f.frame_size.unwrap_or(0);
+        stack_depth += frame_size;
+        if verbose {
+            let indent = recurse_depth * 2;
+            println!(
+                "  {:indent$}{addr:08x}: {} [+{frame_size} => {stack_depth}]",
+                "",
+                f.short_name,
+                indent = indent
+            );
+        }
+
+        if deepest
+            .as_ref()
+            .map(|(max_depth, _)| stack_depth > *max_depth)
+            .unwrap_or(true)
+        {
+            *deepest = Some((stack_depth, call_stack.to_owned()));
+        }
+        for j in &f.calls {
+            if call_stack.contains(j) {
+                // Skip recursive / mutually recursive calls, because we can't
+                // reason about them.
+                continue;
+            } else {
+                call_stack.push(*j);
+                recurse(
+                    call_stack,
+                    recurse_depth + 1,
+                    stack_depth,
+                    fns,
+                    deepest,
+                    verbose,
+                );
+                call_stack.pop();
+            }
+        }
+    }
+
+    // Find stack sizes by traversing the graph
+    if verbose {
+        println!("finding stack sizes for {task_name}");
+    }
+    let start_addr = fns
+        .iter()
+        .find(|(_addr, v)| v.name.as_str() == "_start")
+        .map(|(addr, _v)| *addr)
+        .ok_or_else(|| anyhow!("could not find _start"))?;
+    let mut deepest = None;
+    recurse(&mut vec![start_addr], 0, 0, &fns, &mut deepest, verbose);
+
+    // Check against our configured task stack size
+    let Some((_max_depth, max_stack)) = deepest else {
+        unreachable!("must have at least one call stack");
+    };
+
+    let mut out = vec![];
+    for m in max_stack {
+        let f = fns.get(&m).unwrap();
+        let name = &f.short_name;
+        out.push((f.frame_size.unwrap_or(0), name.clone()));
+    }
+    Ok(out)
+}
+
 /// Link a specific task
 fn link_task(
     cfg: &PackageConfig,
@@ -1661,6 +1967,7 @@ fn build(
             "-C link-arg=-z -C link-arg=common-page-size=0x20 \
              -C link-arg=-z -C link-arg=max-page-size=0x20 \
              -C llvm-args=--enable-machine-outliner=never \
+             -Z emit-stack-sizes \
              -C overflow-checks=y \
              -C metadata={} \
              {}
diff --git a/build/xtask/src/sizes.rs b/build/xtask/src/sizes.rs
index 151c73810..bb7cd7ab6 100644
--- a/build/xtask/src/sizes.rs
+++ b/build/xtask/src/sizes.rs
@@ -15,7 +15,9 @@ use indexmap::map::Entry;
 use indexmap::IndexMap;
 
 use crate::{
-    dist::{Allocations, ContiguousRanges, DEFAULT_KERNEL_STACK},
+    dist::{
+        get_max_stack, Allocations, ContiguousRanges, DEFAULT_KERNEL_STACK,
+    },
     Config,
 };
 
@@ -67,6 +69,8 @@ pub fn run(
         print_memory_map(&toml, &map, verbose)?;
         print!("\n\n");
         print_task_table(&toml, &map)?;
+        print!("\n\n");
+        print_task_stacks(&toml)?;
     }
 
     // Because tasks are autosized, the only place where we can improve
@@ -423,6 +427,25 @@ fn print_memory_map(
     Ok(())
 }
 
+fn print_task_stacks(toml: &Config) -> Result<()> {
+    for (i, (task_name, task)) in toml.tasks.iter().enumerate() {
+        let task_stack_size =
+            task.stacksize.unwrap_or_else(|| toml.stacksize.unwrap());
+
+        let max_stack = get_max_stack(&toml, task_name, false)?;
+        let total: u64 = max_stack.iter().map(|(n, _)| *n).sum();
+        println!("{task_name}: {total} bytes (limit is {task_stack_size})");
+        for (frame_size, name) in max_stack {
+            let s = format!("[+{frame_size}]");
+            println!("  {s:>7} {name}");
+        }
+        if i + 1 < toml.tasks.len() {
+            println!();
+        }
+    }
+    Ok(())
+}
+
 /// Loads the size of the given task (or kernel)
 pub fn load_task_size<'a>(
     toml: &'a Config,