diff --git a/CHANGELOG.md b/CHANGELOG.md index eaac3be8..8303e217 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ Unreleased ---------- +- Added support for automatic demangling of symbols, controlled by + `demangle` feature (at compile time) and corresponding flag in + `symbolize::Builder` (at runtime) - Renamed `symbolize::SymbolizedResult` to `Sym` and made it non-exhaustive - Renamed `Sym::symbol` to `name` diff --git a/Cargo.toml b/Cargo.toml index a4758bef..70651868 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,9 +38,11 @@ name = "blazesym" crate-type = ["cdylib", "rlib", "staticlib"] [features] -default = ["dwarf", "lru"] +default = ["demangle", "dwarf", "lru"] # Enable this feature to enable DWARF support. dwarf = ["gimli"] +# Enable this feature to get transparent symbol demangling. +demangle = ["cpp_demangle", "rustc-demangle"] # Enable this feature to re-generate the library's C header file. An # up-to-date version of this header should already be available in the # include/ directory, so this feature is only necessary when APIs are @@ -71,9 +73,11 @@ lto = true codegen-units = 1 [dependencies] -libc = "0.2.137" +cpp_demangle = {version = "0.4", optional = true} gimli = {version = "0.27.2", optional = true} +libc = "0.2.137" lru = {version = "0.10", optional = true} +rustc-demangle = {version = "0.1", optional = true} tracing = {version = "0.1", default-features = false, features = ["attributes"], optional = true} [dev-dependencies] diff --git a/README.md b/README.md index 8ed74def..af7eede4 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Here is rough roadmap of currently planned features (in no particular order): - [ ] Support inlined function lookup (https://github.com/libbpf/blazesym/issues/192) - [x] Support symbolization of addresses in APKs (relevant for Android) (https://github.com/libbpf/blazesym/pull/222 & https://github.com/libbpf/blazesym/pull/227) - [ ] Support ELF32 binaries (https://github.com/libbpf/blazesym/issues/53) -- [ ] Support demangling of Rust & C++ symbol names (https://github.com/libbpf/blazesym/issues/50) +- [x] Support demangling of Rust & C++ symbol names (https://github.com/libbpf/blazesym/issues/50) - [ ] Support remote symbolization (https://github.com/libbpf/blazesym/issues/61) - [x] Add APIs for address normalization (https://github.com/libbpf/blazesym/pull/114, https://github.com/libbpf/blazesym/pull/128, ...) - [ ] Support advanced symbolization use cases involving [`debuginfod`](https://sourceware.org/elfutils/Debuginfod.html) (https://github.com/libbpf/blazesym/issues/203) diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index c46ca5b6..a2ef12c0 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -26,6 +26,7 @@ use crate::ErrorExt as _; use crate::IntSym; use crate::Pid; use crate::Result; +use crate::SrcLang; use crate::SymResolver; use super::source::Elf; @@ -37,6 +38,36 @@ use super::source::Process; use super::source::Source; +/// Demangle a symbol name using the demangling scheme for the given language. +#[cfg(feature = "demangle")] +fn maybe_demangle(name: &str, language: SrcLang) -> String { + match language { + SrcLang::Rust => rustc_demangle::try_demangle(name) + .ok() + .as_ref() + .map(|x| format!("{x:#}")), + SrcLang::Cpp => cpp_demangle::Symbol::new(name) + .ok() + .and_then(|x| x.demangle(&Default::default()).ok()), + SrcLang::Unknown => rustc_demangle::try_demangle(name) + .map(|x| format!("{x:#}")) + .ok() + .or_else(|| { + cpp_demangle::Symbol::new(name) + .ok() + .and_then(|sym| sym.demangle(&Default::default()).ok()) + }), + } + .unwrap_or_else(|| name.to_string()) +} + +#[cfg(not(feature = "demangle"))] +fn maybe_demangle(name: &str, _language: SrcLang) -> String { + // Demangling is disabled. + name.to_string() +} + + /// The result of symbolization by [`Symbolizer`]. #[derive(Clone, Debug)] pub struct Sym { @@ -121,6 +152,7 @@ impl Builder { ksym_cache, elf_cache, src_location, + demangle, } } } @@ -142,6 +174,7 @@ pub struct Symbolizer { ksym_cache: KSymCache, elf_cache: ElfCache, src_location: bool, + demangle: bool, } impl Symbolizer { @@ -156,6 +189,15 @@ impl Symbolizer { Builder::default() } + /// Demangle the provided symbol if asked for and possible. + fn maybe_demangle(&self, symbol: &str, language: SrcLang) -> String { + if self.demangle { + maybe_demangle(symbol, language) + } else { + symbol.to_string() + } + } + /// Symbolize an address using the provided [`SymResolver`]. #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(addr = format_args!("0x{addr:x}"), resolver = ?resolver)))] fn symbolize_with_resolver(&self, addr: Addr, resolver: &dyn SymResolver) -> Result> { @@ -182,13 +224,9 @@ impl Symbolizer { let mut results = vec![]; for sym in syms { if let Some(ref linfo) = linfo { - let IntSym { - name, - addr, - lang: _lang, - } = sym; + let IntSym { name, addr, lang } = sym; results.push(Sym { - name: String::from(name), + name: self.maybe_demangle(name, lang), addr, path: linfo.path.clone(), line: linfo.line, @@ -196,13 +234,9 @@ impl Symbolizer { _non_exhaustive: (), }); } else { - let IntSym { - name, - addr, - lang: _lang, - } = sym; + let IntSym { name, addr, lang } = sym; results.push(Sym { - name: String::from(name), + name: self.maybe_demangle(name, lang), addr, path: PathBuf::new(), line: 0, diff --git a/tests/blazesym.rs b/tests/blazesym.rs index 4b55882a..69f8adbd 100644 --- a/tests/blazesym.rs +++ b/tests/blazesym.rs @@ -1,5 +1,6 @@ #![allow(clippy::let_and_return, clippy::let_unit_value)] +use std::env::current_exe; use std::ffi::CString; use std::fs::read as read_file; use std::io::Error; @@ -188,6 +189,50 @@ fn symbolize_dwarf_complex() { assert_eq!(result.line, 534); } +/// Symbolize a normalized address inside an ELF file, with and without +/// auto-demangling enabled. +#[test] +fn symbolize_elf_demangle() { + let test_elf = current_exe().unwrap(); + let addr = Normalizer::new as Addr; + let normalizer = Normalizer::new(); + let norm_addrs = normalizer + .normalize_user_addrs_sorted(&[addr], Pid::Slf) + .unwrap(); + let (addr, _meta_idx) = norm_addrs.addrs[0]; + + let src = symbolize::Source::Elf(symbolize::Elf::new(test_elf)); + let symbolizer = Symbolizer::builder().enable_demangling(false).build(); + let results = symbolizer + .symbolize(&src, &[addr]) + .unwrap() + .into_iter() + .flatten() + .collect::>(); + assert_eq!(results.len(), 1); + + let result = &results[0]; + assert!(result.name.contains("Normalizer3new"), "{result:x?}"); + + // Do it again, this time with demangling enabled. + let symbolizer = Symbolizer::new(); + let results = symbolizer + .symbolize(&src, &[addr]) + .unwrap() + .into_iter() + .flatten() + .collect::>(); + assert_eq!(results.len(), 1); + + let result = &results[0]; + assert!( + result.name == "blazesym::normalize::normalizer::Normalizer::new" + || result.name == "::new", + "{}", + result.name + ); +} + /// Check that we can symbolize addresses inside our own process. #[test] fn symbolize_process() { @@ -206,7 +251,14 @@ fn symbolize_process() { assert!(result.name.contains("symbolize_process"), "{result:x?}"); let result = &results[1]; - assert!(result.name.contains("Symbolizer3new"), "{result:x?}"); + // It's not entirely clear why we have seen two different demangled + // symbols, but they both seem legit. + assert!( + result.name == "blazesym::symbolize::symbolizer::Symbolizer::new" + || result.name == "::new", + "{}", + result.name + ); } /// Check that we can normalize addresses in an ELF shared object.