From 43dd335f4d317512a08c780e1147d0a60056e930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Thu, 28 Sep 2023 16:31:29 -0700 Subject: [PATCH] Properly demangle inlined calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We missed to honor the demangle setting for the recently added inlined function calls. Make sure to demangle their names as necessary and desired. Signed-off-by: Daniel Müller --- cli/src/main.rs | 13 +++-- examples/addr2ln.rs | 13 +++-- examples/addr2ln_pid.rs | 13 +++-- examples/backtrace.rs | 26 +++++++--- src/symbolize/mod.rs | 19 ++++--- src/symbolize/symbolizer.rs | 16 +++--- tests/blazesym.rs | 100 ++++++++++++++++++++++++------------ 7 files changed, 131 insertions(+), 69 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index b9d81a6ad..d25b953c3 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -85,7 +85,7 @@ fn print_frame( addr_info: Option<(Addr, Addr, usize)>, code_info: &Option, ) { - let code_info = if let Some(code_info) = code_info { + let code_info = code_info.as_ref().map(|code_info| { let path = code_info.to_path(); let path = path.display(); @@ -94,21 +94,24 @@ fn print_frame( (Some(line), None) => format!(" {path}:{line}"), (None, _) => format!(" {path}"), } - } else { - String::new() - }; + }); if let Some((input_addr, addr, offset)) = addr_info { // If we have various address information bits we have a new symbol. println!( "{input_addr:#0width$x}: {name} @ {addr:#x}+{offset:#x}{code_info}", + code_info = code_info.as_deref().unwrap_or(""), width = ADDR_WIDTH ) } else { // Otherwise we are dealing with an inlined call. println!( - "{:width$} {name} @ {code_info} [inlined]", + "{:width$} {name}{code_info} [inlined]", " ", + code_info = code_info + .map(|info| format!(" @{info}")) + .as_deref() + .unwrap_or(""), width = ADDR_WIDTH ) } diff --git a/examples/addr2ln.rs b/examples/addr2ln.rs index f5381c837..d863408ae 100644 --- a/examples/addr2ln.rs +++ b/examples/addr2ln.rs @@ -16,7 +16,7 @@ const ADDR_WIDTH: usize = 16; fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &Option) { - let code_info = if let Some(code_info) = code_info { + let code_info = code_info.as_ref().map(|code_info| { let path = code_info.to_path(); let path = path.display(); @@ -25,21 +25,24 @@ fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &O (Some(line), None) => format!(" {path}:{line}"), (None, _) => format!(" {path}"), } - } else { - String::new() - }; + }); if let Some((input_addr, addr, offset)) = addr_info { // If we have various address information bits we have a new symbol. println!( "{input_addr:#0width$x}: {name} @ {addr:#x}+{offset:#x}{code_info}", + code_info = code_info.as_deref().unwrap_or(""), width = ADDR_WIDTH ) } else { // Otherwise we are dealing with an inlined call. println!( - "{:width$} {name} @ {code_info} [inlined]", + "{:width$} {name}{code_info} [inlined]", " ", + code_info = code_info + .map(|info| format!(" @{info}")) + .as_deref() + .unwrap_or(""), width = ADDR_WIDTH ) } diff --git a/examples/addr2ln_pid.rs b/examples/addr2ln_pid.rs index 9c56e74be..12b9ba824 100644 --- a/examples/addr2ln_pid.rs +++ b/examples/addr2ln_pid.rs @@ -16,7 +16,7 @@ const ADDR_WIDTH: usize = 16; fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &Option) { - let code_info = if let Some(code_info) = code_info { + let code_info = code_info.as_ref().map(|code_info| { let path = code_info.to_path(); let path = path.display(); @@ -25,21 +25,24 @@ fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &O (Some(line), None) => format!(" {path}:{line}"), (None, _) => format!(" {path}"), } - } else { - String::new() - }; + }); if let Some((input_addr, addr, offset)) = addr_info { // If we have various address information bits we have a new symbol. println!( "{input_addr:#0width$x}: {name} @ {addr:#x}+{offset:#x}{code_info}", + code_info = code_info.as_deref().unwrap_or(""), width = ADDR_WIDTH ) } else { // Otherwise we are dealing with an inlined call. println!( - "{:width$} {name} @ {code_info} [inlined]", + "{:width$} {name}{code_info} [inlined]", " ", + code_info = code_info + .map(|info| format!(" @{info}")) + .as_deref() + .unwrap_or(""), width = ADDR_WIDTH ) } diff --git a/examples/backtrace.rs b/examples/backtrace.rs index 8b472c160..53d612fae 100644 --- a/examples/backtrace.rs +++ b/examples/backtrace.rs @@ -1,3 +1,5 @@ +#![allow(clippy::let_unit_value)] + use std::cmp::min; use std::mem::size_of; use std::mem::transmute; @@ -16,7 +18,7 @@ const ADDR_WIDTH: usize = 16; fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &Option) { - let code_info = if let Some(code_info) = code_info { + let code_info = code_info.as_ref().map(|code_info| { let path = code_info.to_path(); let path = path.display(); @@ -25,21 +27,24 @@ fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &O (Some(line), None) => format!(" {path}:{line}"), (None, _) => format!(" {path}"), } - } else { - String::new() - }; + }); if let Some((input_addr, addr, offset)) = addr_info { // If we have various address information bits we have a new symbol. println!( "{input_addr:#0width$x}: {name} @ {addr:#x}+{offset:#x}{code_info}", + code_info = code_info.as_deref().unwrap_or(""), width = ADDR_WIDTH ) } else { // Otherwise we are dealing with an inlined call. println!( - "{:width$} {name} @ {code_info} [inlined]", + "{:width$} {name}{code_info} [inlined]", " ", + code_info = code_info + .map(|info| format!(" @{info}")) + .as_deref() + .unwrap_or(""), width = ADDR_WIDTH ) } @@ -52,8 +57,13 @@ fn symbolize_current_bt() { let mut bt_buf = [ptr::null_mut::(); MAX_CNT]; let bt_cnt = unsafe { libc::backtrace(bt_buf.as_mut_ptr(), MAX_CNT as _) } as usize; - let bt = &bt_buf[0..min(bt_cnt, MAX_CNT)]; - let bt = unsafe { transmute::<&[*mut libc::c_void], &[Addr]>(bt) }; + let bt = &mut bt_buf[0..min(bt_cnt, MAX_CNT)]; + let bt = unsafe { transmute::<&mut [*mut libc::c_void], &mut [Addr]>(bt) }; + + // For all but the top most address in the call stack, adjust for + // the fact that we captured the address we will return to, but not + // the one we called from. + let () = bt.iter_mut().skip(1).for_each(|addr| *addr -= 1); // Symbolize the addresses for the current process, as that's where // they were captured. @@ -91,7 +101,7 @@ fn f() { g() } -#[inline(never)] +#[inline(always)] fn g() { h() } diff --git a/src/symbolize/mod.rs b/src/symbolize/mod.rs index a9d0e4f43..579762fad 100644 --- a/src/symbolize/mod.rs +++ b/src/symbolize/mod.rs @@ -21,8 +21,12 @@ //! //! const ADDR_WIDTH: usize = 16; //! -//! fn print_frame(name: &str, addr_info: Option<(Addr, Addr, usize)>, code_info: &Option) { -//! let code_info = if let Some(code_info) = code_info { +//! fn print_frame( +//! name: &str, +//! addr_info: Option<(Addr, Addr, usize)>, +//! code_info: &Option, +//! ) { +//! let code_info = code_info.as_ref().map(|code_info| { //! let path = code_info.to_path(); //! let path = path.display(); //! @@ -31,21 +35,24 @@ //! (Some(line), None) => format!(" {path}:{line}"), //! (None, _) => format!(" {path}"), //! } -//! } else { -//! String::new() -//! }; +//! }); //! //! if let Some((input_addr, addr, offset)) = addr_info { //! // If we have various address information bits we have a new symbol. //! println!( //! "{input_addr:#0width$x}: {name} @ {addr:#x}+{offset:#x}{code_info}", +//! code_info = code_info.as_deref().unwrap_or(""), //! width = ADDR_WIDTH //! ) //! } else { //! // Otherwise we are dealing with an inlined call. //! println!( -//! "{:width$} {name} @ {code_info} [inlined]", +//! "{:width$} {name}{code_info} [inlined]", //! " ", +//! code_info = code_info +//! .map(|info| format!(" @{info}")) +//! .as_deref() +//! .unwrap_or(""), //! width = ADDR_WIDTH //! ) //! } diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index d593300a1..fc07f5862 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -214,12 +214,19 @@ impl Symbolizer { (None, None) }; + let IntSym { + name: sym_name, + addr: sym_addr, + size: sym_size, + lang, + } = sym; + let inlined = if let Some(code_info) = &addr_code_info { code_info .inlined .iter() .map(|(name, info)| { - let name = name.to_string(); + let name = self.maybe_demangle(name, lang); let info = info.as_ref().map(CodeInfo::from); InlinedFn { name, @@ -232,13 +239,6 @@ impl Symbolizer { Vec::new() }; - let IntSym { - name: sym_name, - addr: sym_addr, - size: sym_size, - lang, - } = sym; - let sym = Sym { name: self.maybe_demangle(name.unwrap_or(sym_name), lang), addr: sym_addr, diff --git a/tests/blazesym.rs b/tests/blazesym.rs index a07068fb6..1d9bbe8df 100644 --- a/tests/blazesym.rs +++ b/tests/blazesym.rs @@ -4,6 +4,7 @@ use std::env::current_exe; use std::ffi::CString; use std::ffi::OsStr; use std::fs::read as read_file; +use std::hint::black_box; use std::io::Error; use std::os::unix::ffi::OsStringExt as _; use std::path::Path; @@ -221,52 +222,87 @@ fn symbolize_dwarf_complex() { assert_eq!(result.code_info.as_ref().unwrap().line, Some(534)); } + +#[inline(always)] +fn inlined_call() -> usize { + black_box(42) +} + + +#[inline(never)] +fn test_function() { + let x = inlined_call(); + let _x = black_box(x); +} + + /// Symbolize a normalized address inside an ELF file, with and without /// auto-demangling enabled. #[test] fn symbolize_elf_demangle() { - let test_elf = current_exe().unwrap(); - let addr = Normalizer::normalize_user_addrs_sorted as Addr; + fn test(addr: Addr) -> Result<(), ()> { + let test_elf = current_exe().unwrap(); + let src = symbolize::Source::Elf(symbolize::Elf::new(test_elf)); + let symbolizer = Symbolizer::builder().enable_demangling(false).build(); + let result = symbolizer + .symbolize_single(&src, addr) + .unwrap() + .into_sym() + .unwrap(); + + assert!( + result.name.contains("blazesym13test_function"), + "{result:x?}" + ); + + if cfg!(debug_assertions) { + if result.inlined.is_empty() { + return Err(()) + } + assert!(result.inlined[0].name.contains("blazesym12inlined_call")); + + // Do it again, this time with demangling enabled. + let symbolizer = Symbolizer::new(); + let result = symbolizer + .symbolize_single(&src, addr) + .unwrap() + .into_sym() + .unwrap(); + + assert_eq!(result.name, "blazesym::test_function"); + assert_eq!(result.inlined.len(), 1, "{:#?}", result.inlined); + assert_eq!(result.inlined[0].name, "blazesym::inlined_call"); + } + Ok(()) + } + + + let addrs = [test_function as Addr + 8]; let normalizer = Normalizer::builder().enable_build_ids(false).build(); let norm_addrs = normalizer - .normalize_user_addrs_sorted(&[addr], Pid::Slf) + .normalize_user_addrs_sorted(&addrs, Pid::Slf) .unwrap(); let (addr, _meta_idx) = norm_addrs.addrs[0]; + let test_elf = current_exe().unwrap(); let src = symbolize::Source::Elf(symbolize::Elf::new(test_elf)); let symbolizer = Symbolizer::builder().enable_demangling(false).build(); - let results = symbolizer - .symbolize(&src, &[addr]) + let result = symbolizer + .symbolize_single(&src, addr) .unwrap() - .into_iter() - .collect::>(); - assert_eq!(results.len(), 1); - - let result = results[0].as_sym().unwrap(); - assert!( - result - .name - .contains("Normalizer27normalize_user_addrs_sorted"), - "{result:x?}" - ); + .into_sym() + .unwrap(); - // Do it again, this time with demangling enabled. - let symbolizer = Symbolizer::new(); - let results = symbolizer - .symbolize(&src, &[addr]) - .unwrap() - .into_iter() - .collect::>(); - assert_eq!(results.len(), 1); + let addr = result.addr; + let size = result.size.unwrap(); + for inst_addr in addr..addr + size { + println!("{inst_addr:#x} {size}"); + if test(inst_addr).is_ok() { + return + } + } - let result = results[0].as_sym().unwrap(); - assert!( - result.name == "blazesym::normalize::normalizer::Normalizer::normalize_user_addrs_sorted" - || result.name - == "::normalize_user_addrs_sorted", - "{}", - result.name - ); + panic!("failed to find inlined function call"); } /// Check that we can symbolize addresses inside our own process.