diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 299af1d7..b79e23fb 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -79,6 +79,13 @@ jobs: command: build args: --features flamegraph,protobuf-codec --target ${{ matrix.target }} + - name: Run cargo build frame pointer + if: ${{ matrix.toolchain == 'nightly' && matrix.os == 'ubuntu-latest' }} + uses: actions-rs/cargo@v1.0.3 + with: + command: build + args: --no-default-features --features frame-pointer --target ${{ matrix.target }} + test: name: Test strategy: diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fecc12d..149ed25f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add `frame-pointer` feature to unwind the stack with frame pointer (#116) + +### Changed +- The user has to specify one unwind implementation (`backtrace-rs` or `frame-pointer`) in the features (#116) + ## [0.8.0] - 2022-04-20 ### Changed diff --git a/Cargo.toml b/Cargo.toml index 45b54b57..bf8c4e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,16 +10,21 @@ documentation = "https://docs.rs/pprof/" readme = "README.md" [features] -default = ["cpp"] +default = ["cpp", "backtrace-rs"] flamegraph = ["inferno"] + # A private feature to indicate either prost-codec or protobuf-codec is enabled. _protobuf = [] prost-codec = ["prost", "prost-derive", "prost-build", "_protobuf"] protobuf-codec = ["protobuf", "protobuf-codegen-pure", "_protobuf"] + +backtrace-rs = ["backtrace"] +frame-pointer = ["backtrace"] + cpp = ["symbolic-demangle/cpp"] [dependencies] -backtrace = "0.3" +backtrace = { version = "0.3", optional = true } once_cell = "1.9" libc = "^0.2.66" log = "0.4" @@ -71,5 +76,10 @@ name = "collector" path = "benches/collector.rs" harness = false +[[bench]] +name = "addr_validate" +path = "benches/addr_validate.rs" +harness = false + [package.metadata.docs.rs] all-features = true diff --git a/README.md b/README.md index 2aaeb6d2..840b6aa8 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,8 @@ FRAME: backtrace::backtrace::trace::h3e91a3123a3049a5 -> FRAME: pprof::profiler: - `flamegraph` enables the flamegraph report format. - `prost-codec` enables the pprof protobuf report format through `prost`. - `protobuf-codec` enables the pprof protobuf report format through `protobuf` crate. +- `backtrace-rs` unwind the backtrace through `backtrace-rs` (which calls the `Unwind_Backtrace`). +- `frame-pointer` gets the backtrace through frame pointer. **only available for nightly** ## Flamegraph @@ -222,6 +224,12 @@ let guard = pprof::ProfilerGuardBuilder::default().frequency(1000).blocklist(&[" The `vdso` should also be added to the blocklist, because in some distribution (e.g. ubuntu 18.04), the dwarf information in vdso is incorrect. +### Frame Pointer + +The `pprof-rs` also supports unwinding through frame pointer, without the need to use `libunwind`. However, the standard library shipped with the rust compiler does not have the correct frame pointer in every function, so you need to use `cargo +nightly -Z build-std` to build the standard library from source. + +As we cannot get the stack boundaries inside the signal handler, it's also not possible to ensure the safety. If the frame pointer was set to a wrong value, the program will panic. + ### Signal Safety Signal safety is hard to guarantee. But it's not *that* hard. diff --git a/benches/addr_validate.rs b/benches/addr_validate.rs new file mode 100644 index 00000000..4154f33f --- /dev/null +++ b/benches/addr_validate.rs @@ -0,0 +1,29 @@ +// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. + +use criterion::{criterion_group, criterion_main, Criterion}; +use pprof::validate; + +fn bench_validate_addr(c: &mut Criterion) { + c.bench_function("validate stack addr", |b| { + let stack_addrs = [0; 100]; + + b.iter(|| { + stack_addrs.iter().for_each(|item| { + validate(item as *const _ as *const libc::c_void); + }) + }) + }); + + c.bench_function("validate heap addr", |b| { + let heap_addrs = vec![0; 100]; + + b.iter(|| { + heap_addrs.iter().for_each(|item| { + validate(item as *const _ as *const libc::c_void); + }) + }) + }); +} + +criterion_group!(benches, bench_validate_addr); +criterion_main!(benches); diff --git a/src/addr_validate.rs b/src/addr_validate.rs new file mode 100644 index 00000000..10dc27c4 --- /dev/null +++ b/src/addr_validate.rs @@ -0,0 +1,122 @@ +use std::{cell::RefCell, mem::size_of}; + +use nix::{ + errno::Errno, + unistd::{close, read, write}, +}; + +thread_local! { + static MEM_VALIDATE_PIPE: RefCell<[i32; 2]> = RefCell::new([-1, -1]); +} + +#[inline] +#[cfg(target_os = "linux")] +fn create_pipe() -> nix::Result<(i32, i32)> { + use nix::fcntl::OFlag; + use nix::unistd::pipe2; + + pipe2(OFlag::O_CLOEXEC | OFlag::O_NONBLOCK) +} + +#[inline] +#[cfg(target_os = "macos")] +fn create_pipe() -> nix::Result<(i32, i32)> { + use nix::fcntl::{fcntl, FcntlArg, FdFlag, OFlag}; + use nix::unistd::pipe; + use std::os::unix::io::RawFd; + + fn set_flags(fd: RawFd) -> nix::Result<()> { + let mut flags = FdFlag::from_bits(fcntl(fd, FcntlArg::F_GETFD)?).unwrap(); + flags |= FdFlag::FD_CLOEXEC; + fcntl(fd, FcntlArg::F_SETFD(flags))?; + let mut flags = OFlag::from_bits(fcntl(fd, FcntlArg::F_GETFL)?).unwrap(); + flags |= OFlag::O_NONBLOCK; + fcntl(fd, FcntlArg::F_SETFL(flags))?; + Ok(()) + } + + let (read_fd, write_fd) = pipe()?; + set_flags(read_fd)?; + set_flags(write_fd)?; + Ok((read_fd, write_fd)) +} + +fn open_pipe() -> nix::Result<()> { + MEM_VALIDATE_PIPE.with(|pipes| { + let mut pipes = pipes.borrow_mut(); + + // ignore the result + let _ = close(pipes[0]); + let _ = close(pipes[1]); + + let (read_fd, write_fd) = create_pipe()?; + + pipes[0] = read_fd; + pipes[1] = write_fd; + + Ok(()) + }) +} + +pub fn validate(addr: *const libc::c_void) -> bool { + const CHECK_LENGTH: usize = 2 * size_of::<*const libc::c_void>() / size_of::(); + + // read data in the pipe + let valid_read = MEM_VALIDATE_PIPE.with(|pipes| { + let pipes = pipes.borrow(); + loop { + let mut buf = [0u8; CHECK_LENGTH]; + + match read(pipes[0], &mut buf) { + Ok(bytes) => break bytes > 0, + Err(_err @ Errno::EINTR) => continue, + Err(_err @ Errno::EAGAIN) => break true, + Err(_) => break false, + } + } + }); + + if !valid_read && open_pipe().is_err() { + return false; + } + + MEM_VALIDATE_PIPE.with(|pipes| { + let pipes = pipes.borrow(); + loop { + let buf = unsafe { std::slice::from_raw_parts(addr as *const u8, CHECK_LENGTH) }; + + match write(pipes[1], buf) { + Ok(bytes) => break bytes > 0, + Err(_err @ Errno::EINTR) => continue, + Err(_) => break false, + } + } + }) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn validate_stack() { + let i = 0; + + assert_eq!(validate(&i as *const _ as *const libc::c_void), true); + } + + #[test] + fn validate_heap() { + let vec = vec![0; 1000]; + + for i in vec.iter() { + assert_eq!(validate(i as *const _ as *const libc::c_void), true); + } + } + + #[test] + fn failed_validate() { + assert_eq!(validate(0 as *const libc::c_void), false); + assert_eq!(validate((-1 as i32) as usize as *const libc::c_void), false) + } +} diff --git a/src/backtrace/backtrace_rs.rs b/src/backtrace/backtrace_rs.rs new file mode 100644 index 00000000..1b7e3663 --- /dev/null +++ b/src/backtrace/backtrace_rs.rs @@ -0,0 +1,28 @@ +impl super::Frame for backtrace::Frame { + type S = backtrace::Symbol; + + fn ip(&self) -> usize { + self.ip() as usize + } + + fn resolve_symbol(&self, cb: F) { + backtrace::resolve_frame(self, cb); + } + + fn symbol_address(&self) -> *mut libc::c_void { + self.symbol_address() + } +} + +pub struct Trace {} + +impl super::Trace for Trace { + type Frame = backtrace::Frame; + + fn trace bool>(_: *mut libc::c_void, cb: F) { + unsafe { backtrace::trace_unsynchronized(cb) } + } +} + +pub use backtrace::Frame; +pub use backtrace::Symbol; diff --git a/src/backtrace/frame_pointer.rs b/src/backtrace/frame_pointer.rs new file mode 100644 index 00000000..1810e4a0 --- /dev/null +++ b/src/backtrace/frame_pointer.rs @@ -0,0 +1,116 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::ptr::null_mut; + +use libc::c_void; + +use crate::addr_validate::validate; + +#[derive(Clone, Debug)] +pub struct Frame { + pub ip: usize, +} + +extern "C" { + fn _Unwind_FindEnclosingFunction(pc: *mut c_void) -> *mut c_void; + +} + +impl super::Frame for Frame { + type S = backtrace::Symbol; + + fn ip(&self) -> usize { + self.ip + } + + fn resolve_symbol(&self, cb: F) { + backtrace::resolve(self.ip as *mut c_void, cb); + } + + fn symbol_address(&self) -> *mut libc::c_void { + if cfg!(target_os = "macos") || cfg!(target_os = "ios") { + self.ip as *mut c_void + } else { + unsafe { _Unwind_FindEnclosingFunction(self.ip as *mut c_void) } + } + } +} + +pub struct Trace {} +impl super::Trace for Trace { + type Frame = Frame; + + fn trace bool>(ucontext: *mut libc::c_void, mut cb: F) { + let ucontext: *mut libc::ucontext_t = ucontext as *mut libc::ucontext_t; + if ucontext.is_null() { + return; + } + + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + let frame_pointer = + unsafe { (*ucontext).uc_mcontext.gregs[libc::REG_RBP as usize] as usize }; + + #[cfg(all(target_arch = "x86_64", target_os = "macos"))] + let frame_pointer = unsafe { + let mcontext = (*ucontext).uc_mcontext; + if mcontext.is_null() { + 0 + } else { + (*mcontext).__ss.__rbp as usize + } + }; + + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + let frame_pointer = unsafe { (*ucontext).uc_mcontext.regs[29] as usize }; + + #[cfg(all(target_arch = "aarch64", target_os = "macos"))] + let frame_pointer = unsafe { + let mcontext = (*ucontext).uc_mcontext; + if mcontext.is_null() { + 0 + } else { + (*mcontext).__ss.__fp as usize + } + }; + + let mut frame_pointer = frame_pointer as *mut FramePointerLayout; + + let mut last_frame_pointer: *mut FramePointerLayout = null_mut(); + loop { + // The stack grow from high address to low address. + // but we don't have a reasonable assumption for the hightest address + // the `__libc_stack_end` is not thread-local, and only represent the + // stack end of the main thread. For other thread, their stacks are allocated + // by the `pthread`. + // + // TODO: If we can hook the thread creation, we will have chance to get the + // stack end through `pthread_get_attr`. + + // the frame pointer should never be smaller than the former one. + if !last_frame_pointer.is_null() && frame_pointer < last_frame_pointer { + break; + } + + if !validate(frame_pointer as *const libc::c_void) { + break; + } + last_frame_pointer = frame_pointer; + + // iterate to the next frame + let frame = Frame { + ip: unsafe { (*frame_pointer).ret }, + }; + + if !cb(&frame) { + break; + } + frame_pointer = unsafe { (*frame_pointer).frame_pointer }; + } + } +} + +#[repr(C)] +struct FramePointerLayout { + frame_pointer: *mut FramePointerLayout, + ret: usize, +} diff --git a/src/backtrace/mod.rs b/src/backtrace/mod.rs new file mode 100644 index 00000000..f838e858 --- /dev/null +++ b/src/backtrace/mod.rs @@ -0,0 +1,61 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use libc::c_void; +use std::path::PathBuf; + +pub trait Symbol: Sized { + fn name(&self) -> Option>; + fn addr(&self) -> Option<*mut c_void>; + fn lineno(&self) -> Option; + fn filename(&self) -> Option; +} + +impl Symbol for backtrace::Symbol { + fn name(&self) -> Option> { + self.name().map(|name| name.as_bytes().to_vec()) + } + + fn addr(&self) -> Option<*mut libc::c_void> { + self.addr() + } + + fn lineno(&self) -> Option { + self.lineno() + } + + fn filename(&self) -> Option { + self.filename().map(|filename| filename.to_owned()) + } +} + +pub trait Frame: Sized + Clone { + type S: Symbol; + + fn resolve_symbol(&self, cb: F); + fn symbol_address(&self) -> *mut c_void; + fn ip(&self) -> usize; +} + +pub trait Trace { + type Frame; + + fn trace bool>(_: *mut libc::c_void, cb: F) + where + Self: Sized; +} + +#[cfg(feature = "backtrace-rs")] +mod backtrace_rs; +#[cfg(feature = "backtrace-rs")] +pub use backtrace_rs::Trace as TraceImpl; + +#[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + feature = "frame-pointer" +))] +pub mod frame_pointer; +#[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + feature = "frame-pointer" +))] +pub use frame_pointer::Trace as TraceImpl; diff --git a/src/frames.rs b/src/frames.rs index acac2d1d..83ed61a9 100644 --- a/src/frames.rs +++ b/src/frames.rs @@ -6,15 +6,15 @@ use std::hash::{Hash, Hasher}; use std::os::raw::c_void; use std::path::PathBuf; -use backtrace::Frame; use smallvec::SmallVec; use symbolic_demangle::demangle; +use crate::backtrace::{Frame, Trace, TraceImpl}; use crate::{MAX_DEPTH, MAX_THREAD_NAME}; #[derive(Clone)] pub struct UnresolvedFrames { - pub frames: SmallVec<[Frame; MAX_DEPTH]>, + pub frames: SmallVec<[::Frame; MAX_DEPTH]>, pub thread_name: [u8; MAX_THREAD_NAME], pub thread_name_length: usize, pub thread_id: u64, @@ -39,7 +39,11 @@ impl Debug for UnresolvedFrames { } impl UnresolvedFrames { - pub fn new(frames: SmallVec<[Frame; MAX_DEPTH]>, tn: &[u8], thread_id: u64) -> Self { + pub fn new( + frames: SmallVec<[::Frame; MAX_DEPTH]>, + tn: &[u8], + thread_id: u64, + ) -> Self { let thread_name_length = tn.len(); let mut thread_name = [0; MAX_THREAD_NAME]; thread_name[0..thread_name_length].clone_from_slice(tn); @@ -96,7 +100,7 @@ pub struct Symbol { impl Symbol { pub fn raw_name(&self) -> &[u8] { - self.name.as_deref().unwrap_or(b"Unknow") + self.name.as_deref().unwrap_or(b"Unknown") } pub fn name(&self) -> String { @@ -111,7 +115,7 @@ impl Symbol { self.filename .as_ref() .map(|name| name.as_os_str().to_string_lossy()) - .unwrap_or_else(|| Cow::Borrowed("Unknow")) + .unwrap_or_else(|| Cow::Borrowed("Unknown")) } pub fn lineno(&self) -> u32 { @@ -121,13 +125,16 @@ impl Symbol { unsafe impl Send for Symbol {} -impl From<&backtrace::Symbol> for Symbol { - fn from(symbol: &backtrace::Symbol) -> Self { +impl From<&T> for Symbol +where + T: crate::backtrace::Symbol, +{ + fn from(symbol: &T) -> Self { Symbol { - name: symbol.name().map(|name| name.as_bytes().to_vec()), + name: symbol.name(), addr: symbol.addr(), lineno: symbol.lineno(), - filename: symbol.filename().map(|filename| filename.to_owned()), + filename: symbol.filename(), } } } @@ -177,9 +184,9 @@ impl From for Frames { let mut frame_iter = frames.frames.iter(); while let Some(frame) = frame_iter.next() { - let mut symbols = Vec::new(); + let mut symbols: Vec = Vec::new(); - backtrace::resolve_frame(frame, |symbol| { + frame.resolve_symbol(|symbol| { let symbol = Symbol::from(symbol); symbols.push(symbol); }); diff --git a/src/lib.rs b/src/lib.rs index c54e9b09..2024f654 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,9 @@ pub const MAX_DEPTH: usize = 32; /// Define the MAX supported thread name length. TODO: make this variable mutable. pub const MAX_THREAD_NAME: usize = 16; +mod addr_validate; + +mod backtrace; mod collector; mod error; mod frames; @@ -52,6 +55,7 @@ mod profiler; mod report; mod timer; +pub use self::addr_validate::validate; pub use self::collector::{Collector, HashCounter}; pub use self::error::{Error, Result}; pub use self::frames::{Frames, Symbol}; diff --git a/src/profiler.rs b/src/profiler.rs index d535e942..0012e245 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -3,7 +3,6 @@ use std::convert::TryInto; use std::os::raw::c_int; -use backtrace::Frame; use nix::sys::signal; use once_cell::sync::Lazy; use parking_lot::RwLock; @@ -12,6 +11,7 @@ use smallvec::SmallVec; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use findshlibs::{Segment, SharedLibrary, TargetSharedLibrary}; +use crate::backtrace::{Frame, Trace, TraceImpl}; use crate::collector::Collector; use crate::error::{Error, Result}; use crate::frames::UnresolvedFrames; @@ -54,6 +54,7 @@ impl ProfilerGuardBuilder { pub fn frequency(self, frequency: c_int) -> Self { Self { frequency, ..self } } + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] pub fn blocklist>(self, blocklist: &[T]) -> Self { let blocklist_segments = { @@ -124,7 +125,7 @@ pub struct ProfilerGuard<'a> { fn trigger_lazy() { let _ = backtrace::Backtrace::new(); - let _lock = PROFILER.read(); + let _profiler = PROFILER.read(); } impl ProfilerGuard<'_> { @@ -181,12 +182,12 @@ fn write_thread_name_fallback(current_thread: libc::pthread_t, name: &mut [libc: } } -#[cfg(not(all(any(target_os = "linux", target_os = "macos"), target_env = "gnu")))] +#[cfg(not(any(target_os = "linux", target_os = "macos")))] fn write_thread_name(current_thread: libc::pthread_t, name: &mut [libc::c_char]) { write_thread_name_fallback(current_thread, name); } -#[cfg(all(any(target_os = "linux", target_os = "macos"), target_env = "gnu"))] +#[cfg(any(target_os = "linux", target_os = "macos"))] fn write_thread_name(current_thread: libc::pthread_t, name: &mut [libc::c_char]) { let name_ptr = name as *mut [libc::c_char] as *mut libc::c_char; let ret = unsafe { libc::pthread_getname_np(current_thread, name_ptr, MAX_THREAD_NAME) }; @@ -208,7 +209,7 @@ extern "C" fn perf_signal_handler( ) { if let Some(mut guard) = PROFILER.try_write() { if let Ok(profiler) = guard.as_mut() { - #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if !ucontext.is_null() { let ucontext: *mut libc::ucontext_t = ucontext as *mut libc::ucontext_t; @@ -244,20 +245,23 @@ extern "C" fn perf_signal_handler( } } - let mut bt: SmallVec<[Frame; MAX_DEPTH]> = SmallVec::with_capacity(MAX_DEPTH); + let mut bt: SmallVec<[::Frame; MAX_DEPTH]> = + SmallVec::with_capacity(MAX_DEPTH); let mut index = 0; + TraceImpl::trace(ucontext, |frame| { + let ip = Frame::ip(frame); + if profiler.is_blocklisted(ip) { + return false; + } - unsafe { - backtrace::trace_unsynchronized(|frame| { - if index < MAX_DEPTH { - bt.push(frame.clone()); - index += 1; - true - } else { - false - } - }); - } + if index < MAX_DEPTH { + bt.push(frame.clone()); + index += 1; + true + } else { + false + } + }); let current_thread = unsafe { libc::pthread_self() }; let mut name = [0; MAX_THREAD_NAME]; @@ -349,7 +353,7 @@ impl Profiler { // This function has to be AS-safe pub fn sample( &mut self, - backtrace: SmallVec<[Frame; MAX_DEPTH]>, + backtrace: SmallVec<[::Frame; MAX_DEPTH]>, thread_name: &[u8], thread_id: u64, ) {