From abf4e652fbe810711e630bf4614953634980cd7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 1 Oct 2024 11:40:49 -0700 Subject: [PATCH] Allow for early termination of symbol iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inspect::Inspector::for_each() functionality unconditionally iterates over all symbols (except when encountering an error, of course). That can be a bit limiting in case a user is only interested in, say, the first symbol matching some constraint. While not fundamentally different, iteration over all remaining symbols can be a performance issue for sufficiently large symbol sources. With this change we adjust the signature of the iteration/callback function to return a std::ops::ControlFlow variant indicating whether to continue iteration or stop. Signed-off-by: Daniel Müller --- CHANGELOG.md | 2 ++ cli/src/main.rs | 2 ++ examples/inspect-mangled.rs | 14 +++++++++----- src/breakpad/resolver.rs | 9 +++++++-- src/dwarf/resolver.rs | 5 ++++- src/elf/parser.rs | 6 +++++- src/inspect/inspector.rs | 6 +++++- src/inspect/mod.rs | 3 ++- src/ksym.rs | 10 ++++++++-- tests/blazesym.rs | 37 +++++++++++++++++++++++++++++++++++++ 10 files changed, 81 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2a2f04b..a18721ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ Unreleased ---------- - Adjusted normalization logic to use "symbolic path" for reading build IDs when normalizing with `NormalizeOpts::map_files` equal to `false` +- Adjusted `inspect::Inspector::for_each` to accept callback returning + `std::ops::ControlFlow` to facilitate early termination 0.2.0-rc.1 diff --git a/cli/src/main.rs b/cli/src/main.rs index c21f4495..e1a2a31f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,6 +3,7 @@ mod args; use std::cmp::max; +use std::ops::ControlFlow; use anyhow::Context; use anyhow::Result; @@ -113,6 +114,7 @@ fn inspect(inspect: args::inspect::Inspect) -> Result<()> { let mut sym_infos = Vec::new(); let () = inspector.for_each(&src, |sym| { let () = sym_infos.push(sym.to_owned()); + ControlFlow::Continue(()) })?; let () = sym_infos.sort_by_key(|sym| sym.addr); let () = print_sym_infos(&sym_infos); diff --git a/examples/inspect-mangled.rs b/examples/inspect-mangled.rs index 76ebeb36..d1554a3c 100644 --- a/examples/inspect-mangled.rs +++ b/examples/inspect-mangled.rs @@ -1,6 +1,7 @@ //! An example illustrating how to look up a (mangled) symbol (and its //! meta data) based on its expected demangled name. +use std::ops::ControlFlow; use std::path::Path; use anyhow::Result; @@ -16,13 +17,16 @@ use rustc_demangle::demangle; const TEST_FN: &str = "test::test_function"; -fn find_test_function(sym: &SymInfo<'_>, test_fn: &mut Option>) { +fn find_test_function( + sym: &SymInfo<'_>, + test_fn: &mut Option>, +) -> ControlFlow<()> { let name = format!("{:#}", demangle(&sym.name)); if name == TEST_FN { - if let Some(prev) = test_fn.replace(sym.to_owned()) { - // There should exist only a single symbol by that name. - panic!("already found one `{TEST_FN}` symbol: {prev:?}") - } + let _none = test_fn.replace(sym.to_owned()); + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) } } diff --git a/src/breakpad/resolver.rs b/src/breakpad/resolver.rs index 1e4adefd..930a16ac 100644 --- a/src/breakpad/resolver.rs +++ b/src/breakpad/resolver.rs @@ -5,6 +5,7 @@ use std::fmt::Formatter; use std::fmt::Result as FmtResult; use std::fs::File; use std::mem::swap; +use std::ops::ControlFlow; use std::path::Path; use std::path::PathBuf; @@ -231,7 +232,9 @@ impl Inspect for BreakpadResolver { for func in &self.symbol_file.functions { let sym = SymInfo::from(func); - let () = f(&sym); + if let ControlFlow::Break(()) = f(&sym) { + return Ok(()) + } } Ok(()) } @@ -282,7 +285,9 @@ mod tests { let err = resolver.find_addr("a_variable", &opts).unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unsupported); - let err = resolver.for_each(&opts, &mut |_| ()).unwrap_err(); + let err = resolver + .for_each(&opts, &mut |_| ControlFlow::Continue(())) + .unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unsupported); } } diff --git a/src/dwarf/resolver.rs b/src/dwarf/resolver.rs index 6deb6710..204e2695 100644 --- a/src/dwarf/resolver.rs +++ b/src/dwarf/resolver.rs @@ -449,6 +449,7 @@ mod tests { use std::env::current_exe; use std::ffi::OsStr; + use std::ops::ControlFlow; use std::path::PathBuf; use test_log::test; @@ -552,7 +553,9 @@ mod tests { let err = resolver.find_addr("factorial", &opts).unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unsupported); - let err = resolver.for_each(&opts, &mut |_| ()).unwrap_err(); + let err = resolver + .for_each(&opts, &mut |_| ControlFlow::Continue(())) + .unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unsupported); } } diff --git a/src/elf/parser.rs b/src/elf/parser.rs index a6973812..daf252d6 100644 --- a/src/elf/parser.rs +++ b/src/elf/parser.rs @@ -4,6 +4,7 @@ use std::fmt::Formatter; use std::fmt::Result as FmtResult; use std::fs::File; use std::mem; +use std::ops::ControlFlow; use std::ops::Deref as _; use std::path::Path; use std::path::PathBuf; @@ -815,7 +816,9 @@ impl ElfParser { .flatten(), obj_file_name: None, }; - let () = f(&sym_info); + if let ControlFlow::Break(()) = f(&sym_info) { + return Ok(()) + } } } @@ -1154,6 +1157,7 @@ mod tests { .for_each(&opts, &mut |sym| { let file_offset = parser.find_file_offset(sym.addr).unwrap(); assert_eq!(file_offset, sym.file_offset); + ControlFlow::Continue(()) }) .unwrap(); } diff --git a/src/inspect/inspector.rs b/src/inspect/inspector.rs index 520d2f9b..906a3983 100644 --- a/src/inspect/inspector.rs +++ b/src/inspect/inspector.rs @@ -1,5 +1,6 @@ #[cfg(feature = "breakpad")] use std::fs::File; +use std::ops::ControlFlow; use std::ops::Deref as _; #[cfg(feature = "breakpad")] use std::path::Path; @@ -149,6 +150,9 @@ impl Inspector { /// Symbols are reported in implementation defined order that should /// not be relied on. /// + /// To stop symbol iteration early, return [`ControlFlow::Break`] from the + /// callback function. + /// /// # Notes /// - no symbol name demangling is performed currently /// - currently only function symbols (as opposed to variables) are reported @@ -163,7 +167,7 @@ impl Inspector { /// - addresses are reported as they appear in the symbol source pub fn for_each(&self, src: &Source, mut f: F) -> Result<()> where - F: FnMut(&SymInfo<'_>), + F: FnMut(&SymInfo<'_>) -> ControlFlow<()>, { fn for_each_impl(slf: &Inspector, src: &Source, f: &mut ForEachFn<'_>) -> Result<()> { let (resolver, opts) = match src { diff --git a/src/inspect/mod.rs b/src/inspect/mod.rs index 1227039c..7a241b2a 100644 --- a/src/inspect/mod.rs +++ b/src/inspect/mod.rs @@ -20,6 +20,7 @@ mod source; use std::borrow::Cow; use std::fmt::Debug; +use std::ops::ControlFlow; use std::path::Path; use crate::Addr; @@ -87,7 +88,7 @@ pub(crate) struct FindAddrOpts { /// The signature of a function for iterating over symbols. -pub(crate) type ForEachFn<'f> = dyn FnMut(&SymInfo<'_>) + 'f; +pub(crate) type ForEachFn<'f> = dyn FnMut(&SymInfo<'_>) -> ControlFlow<()> + 'f; /// The trait providing inspection functionality. diff --git a/src/ksym.rs b/src/ksym.rs index 32641c1b..807dff1d 100644 --- a/src/ksym.rs +++ b/src/ksym.rs @@ -5,6 +5,7 @@ use std::fmt::Result as FmtResult; use std::fs::File; use std::io::BufRead; use std::io::BufReader; +use std::ops::ControlFlow; use std::path::Path; use std::path::PathBuf; @@ -188,7 +189,9 @@ impl Inspect for KSymResolver { for ksym in &self.syms { let sym = SymInfo::from(ksym); - let () = f(&sym); + if let ControlFlow::Break(()) = f(&sym) { + return Ok(()) + } } Ok(()) } @@ -377,7 +380,10 @@ mod tests { let opts = FindAddrOpts::default(); let mut syms = Vec::with_capacity(resolver.syms.len()); let () = resolver - .for_each(&opts, &mut |sym| syms.push(sym.name.to_string())) + .for_each(&opts, &mut |sym| { + let () = syms.push(sym.name.to_string()); + ControlFlow::Continue(()) + }) .unwrap(); let () = syms.sort(); assert_eq!(syms, vec!["a", "b", "j", "z"]); diff --git a/tests/blazesym.rs b/tests/blazesym.rs index 603dd659..4f88100b 100644 --- a/tests/blazesym.rs +++ b/tests/blazesym.rs @@ -14,6 +14,7 @@ use std::fs::read as read_file; use std::io::Error; use std::io::Read as _; use std::io::Write as _; +use std::ops::ControlFlow; use std::ops::Deref as _; #[cfg(not(windows))] use std::os::unix::ffi::OsStringExt as _; @@ -1130,6 +1131,7 @@ fn inspect_elf_breakpad_all_symbols() { let () = inspector .for_each(src, |sym| { let _inserted = syms.insert(sym.name.to_string(), sym.to_owned()); + ControlFlow::Continue(()) }) .unwrap(); @@ -1182,6 +1184,40 @@ fn inspect_elf_breakpad_all_symbols() { } +/// Check that early stopping of symbol iteration works as expected. +#[test] +fn inspect_elf_breakpad_early_iter_stop() { + fn test(src: &inspect::Source) { + let mut i = 0; + let inspector = Inspector::new(); + let () = inspector + .for_each(src, |_sym| { + if i == 0 { + i += 1; + ControlFlow::Break(()) + } else { + panic!() + } + }) + .unwrap(); + } + + let test_elf = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs-no-dwarf.bin"); + let elf = inspect::Elf::new(test_elf); + let src = inspect::Source::Elf(elf); + test(&src); + + let path = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs.sym"); + let breakpad = inspect::Breakpad::new(path); + let src = inspect::Source::Breakpad(breakpad); + test(&src); +} + + /// Check that we can iterate over all symbols in an ELF file, without /// encountering duplicates caused by dynamic/static symbol overlap. #[test] @@ -1197,6 +1233,7 @@ fn inspect_elf_all_symbols_without_duplicates() { let () = inspector .for_each(&src, |sym| { let () = syms.push(sym.name.to_string()); + ControlFlow::Continue(()) }) .unwrap();