Skip to content

Commit

Permalink
Add support for demangling symbols
Browse files Browse the repository at this point in the history
This change adds support for transparent demangling of symbols.
Demangling is generally runtime configurable (via the previously added
Builder flag), but because it involves additional dependencies there is
also the option to disable it at compile time. If disabled at compile
time, the runtime option becomes a no-op.
Demangling currently happens in a single location inside the
Symbolizer type. This has the advantage of concentrating the feature's
implementation. However, it means that we need to bubble up the inferred
language from all resolvers -- something that the recently added IntSym
type helps us with.
Note that this change only provides *de*mangling support. That is, it
hooks into the APIs mapping addresses to human readable symbols. The
inverse, mapping human readable and potentially *un*mangled symbols to
their mangled counterparts and then to addresses is not currently being
done (that would be a useful addition to the inspect module, but is out
of the scope of this change).

Closes: libbpf#50

Signed-off-by: Daniel Müller <deso@posteo.net>
  • Loading branch information
d-e-s-o committed Jul 26, 2023
1 parent a838b76 commit bb35252
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Unreleased
----------
- Added support for automatic demangling of symbols, controlled by
`demangle` feature (at compile time) and corresponding flag in
`symbolize::Builder` (at runtime)
- Renamed `symbolize::SymbolizedResult` to `Sym` and made it
non-exhaustive
- Renamed `Sym::symbol` to `name`
Expand Down
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ name = "blazesym"
crate-type = ["cdylib", "rlib", "staticlib"]

[features]
default = ["dwarf", "lru"]
default = ["demangle", "dwarf", "lru"]
# Enable this feature to enable DWARF support.
dwarf = ["gimli"]
# Enable this feature to get transparent symbol demangling.
demangle = ["cpp_demangle", "rustc-demangle"]
# Enable this feature to re-generate the library's C header file. An
# up-to-date version of this header should already be available in the
# include/ directory, so this feature is only necessary when APIs are
Expand Down Expand Up @@ -71,9 +73,11 @@ lto = true
codegen-units = 1

[dependencies]
libc = "0.2.137"
cpp_demangle = {version = "0.4", optional = true}
gimli = {version = "0.27.2", optional = true}
libc = "0.2.137"
lru = {version = "0.10", optional = true}
rustc-demangle = {version = "0.1", optional = true}
tracing = {version = "0.1", default-features = false, features = ["attributes"], optional = true}

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Here is rough roadmap of currently planned features (in no particular order):
- [ ] Support inlined function lookup (https://github.com/libbpf/blazesym/issues/192)
- [x] Support symbolization of addresses in APKs (relevant for Android) (https://github.com/libbpf/blazesym/pull/222 & https://github.com/libbpf/blazesym/pull/227)
- [ ] Support ELF32 binaries (https://github.com/libbpf/blazesym/issues/53)
- [ ] Support demangling of Rust & C++ symbol names (https://github.com/libbpf/blazesym/issues/50)
- [x] Support demangling of Rust & C++ symbol names (https://github.com/libbpf/blazesym/issues/50)
- [ ] Support remote symbolization (https://github.com/libbpf/blazesym/issues/61)
- [x] Add APIs for address normalization (https://github.com/libbpf/blazesym/pull/114, https://github.com/libbpf/blazesym/pull/128, ...)
- [ ] Support advanced symbolization use cases involving [`debuginfod`](https://sourceware.org/elfutils/Debuginfod.html) (https://github.com/libbpf/blazesym/issues/203)
Expand Down
58 changes: 46 additions & 12 deletions src/symbolize/symbolizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use crate::ErrorExt as _;
use crate::IntSym;
use crate::Pid;
use crate::Result;
use crate::SrcLang;
use crate::SymResolver;

use super::source::Elf;
Expand All @@ -37,6 +38,36 @@ use super::source::Process;
use super::source::Source;


/// Demangle a symbol name using the demangling scheme for the given language.
#[cfg(feature = "demangle")]
fn maybe_demangle(name: &str, language: SrcLang) -> String {
match language {
SrcLang::Rust => rustc_demangle::try_demangle(name)
.ok()
.as_ref()
.map(|x| format!("{x:#}")),
SrcLang::Cpp => cpp_demangle::Symbol::new(name)
.ok()
.and_then(|x| x.demangle(&Default::default()).ok()),
SrcLang::Unknown => rustc_demangle::try_demangle(name)
.map(|x| format!("{x:#}"))
.ok()
.or_else(|| {
cpp_demangle::Symbol::new(name)
.ok()
.and_then(|sym| sym.demangle(&Default::default()).ok())
}),
}
.unwrap_or_else(|| name.to_string())
}

#[cfg(not(feature = "demangle"))]
fn maybe_demangle(name: &str, _language: SrcLang) -> String {
// Demangling is disabled.
name.to_string()
}


/// The result of symbolization by [`Symbolizer`].
#[derive(Clone, Debug)]
pub struct Sym {
Expand Down Expand Up @@ -121,6 +152,7 @@ impl Builder {
ksym_cache,
elf_cache,
src_location,
demangle,
}
}
}
Expand All @@ -142,6 +174,7 @@ pub struct Symbolizer {
ksym_cache: KSymCache,
elf_cache: ElfCache,
src_location: bool,
demangle: bool,
}

impl Symbolizer {
Expand All @@ -156,6 +189,15 @@ impl Symbolizer {
Builder::default()
}

/// Demangle the provided symbol if asked for and possible.
fn maybe_demangle(&self, symbol: &str, language: SrcLang) -> String {
if self.demangle {
maybe_demangle(symbol, language)
} else {
symbol.to_string()
}
}

/// Symbolize an address using the provided [`SymResolver`].
#[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(addr = format_args!("0x{addr:x}"), resolver = ?resolver)))]
fn symbolize_with_resolver(&self, addr: Addr, resolver: &dyn SymResolver) -> Result<Vec<Sym>> {
Expand All @@ -182,27 +224,19 @@ impl Symbolizer {
let mut results = vec![];
for sym in syms {
if let Some(ref linfo) = linfo {
let IntSym {
name,
addr,
lang: _lang,
} = sym;
let IntSym { name, addr, lang } = sym;
results.push(Sym {
name: String::from(name),
name: self.maybe_demangle(name, lang),
addr,
path: linfo.path.clone(),
line: linfo.line,
column: linfo.column,
_non_exhaustive: (),
});
} else {
let IntSym {
name,
addr,
lang: _lang,
} = sym;
let IntSym { name, addr, lang } = sym;
results.push(Sym {
name: String::from(name),
name: self.maybe_demangle(name, lang),
addr,
path: PathBuf::new(),
line: 0,
Expand Down
54 changes: 53 additions & 1 deletion tests/blazesym.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(clippy::let_and_return, clippy::let_unit_value)]

use std::env::current_exe;
use std::ffi::CString;
use std::fs::read as read_file;
use std::io::Error;
Expand Down Expand Up @@ -188,6 +189,50 @@ fn symbolize_dwarf_complex() {
assert_eq!(result.line, 534);
}

/// Symbolize a normalized address inside an ELF file, with and without
/// auto-demangling enabled.
#[test]
fn symbolize_elf_demangle() {
let test_elf = current_exe().unwrap();
let addr = Normalizer::new as Addr;
let normalizer = Normalizer::new();
let norm_addrs = normalizer
.normalize_user_addrs_sorted(&[addr], Pid::Slf)
.unwrap();
let (addr, _meta_idx) = norm_addrs.addrs[0];

let src = symbolize::Source::Elf(symbolize::Elf::new(test_elf));
let symbolizer = Symbolizer::builder().enable_demangling(false).build();
let results = symbolizer
.symbolize(&src, &[addr])
.unwrap()
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(results.len(), 1);

let result = &results[0];
assert!(result.name.contains("Normalizer3new"), "{result:x?}");

// Do it again, this time with demangling enabled.
let symbolizer = Symbolizer::new();
let results = symbolizer
.symbolize(&src, &[addr])
.unwrap()
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(results.len(), 1);

let result = &results[0];
assert!(
result.name == "blazesym::normalize::normalizer::Normalizer::new"
|| result.name == "<blazesym::normalize::normalizer::Normalizer>::new",
"{}",
result.name
);
}

/// Check that we can symbolize addresses inside our own process.
#[test]
fn symbolize_process() {
Expand All @@ -206,7 +251,14 @@ fn symbolize_process() {
assert!(result.name.contains("symbolize_process"), "{result:x?}");

let result = &results[1];
assert!(result.name.contains("Symbolizer3new"), "{result:x?}");
// It's not entirely clear why we have seen two different demangled
// symbols, but they both seem legit.
assert!(
result.name == "blazesym::symbolize::symbolizer::Symbolizer::new"
|| result.name == "<blazesym::symbolize::symbolizer::Symbolizer>::new",
"{}",
result.name
);
}

/// Check that we can normalize addresses in an ELF shared object.
Expand Down

0 comments on commit bb35252

Please sign in to comment.