Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use /proc/self/maps when available instead of std::env::current_exe #488

5 changes: 5 additions & 0 deletions Cargo.toml
Expand Up @@ -130,3 +130,8 @@ edition = '2018'
name = "concurrent-panics"
required-features = ["std"]
harness = false

[[test]]
name = "current-exe-mismatch"
required-features = ["std"]
harness = false
2 changes: 2 additions & 0 deletions src/symbolize/gimli.rs
Expand Up @@ -184,6 +184,8 @@ cfg_if::cfg_if! {
))] {
mod libs_dl_iterate_phdr;
use libs_dl_iterate_phdr::native_libraries;
#[path = "gimli/parse_running_mmaps_unix.rs"]
mod parse_running_mmaps;
} else if #[cfg(target_env = "libnx")] {
mod libs_libnx;
use libs_libnx::native_libraries;
Expand Down
20 changes: 19 additions & 1 deletion src/symbolize/gimli/libs_dl_iterate_phdr.rs
Expand Up @@ -17,6 +17,20 @@ pub(super) fn native_libraries() -> Vec<Library> {
return ret;
}

fn infer_current_exe(base_addr: usize) -> OsString {
if let Ok(entries) = super::parse_running_mmaps::parse_maps() {
let opt_path = entries
.iter()
.find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0)
.map(|e| e.pathname())
.cloned();
if let Some(path) = opt_path {
return path;
}
}
env::current_exe().map(|e| e.into()).unwrap_or_default()
}

// `info` should be a valid pointers.
// `vec` should be a valid pointer to a `std::Vec`.
unsafe extern "C" fn callback(
Expand All @@ -28,8 +42,12 @@ unsafe extern "C" fn callback(
let libs = &mut *(vec as *mut Vec<Library>);
let is_main_prog = info.dlpi_name.is_null() || *info.dlpi_name == 0;
let name = if is_main_prog {
// The man page for dl_iterate_phdr says that the first object visited by
// callback is the main program; so the first time we encounter a
// nameless entry, we can assume its the main program and try to infer its path.
// After that, we cannot continue that assumption, and we use an empty string.
if libs.is_empty() {
env::current_exe().map(|e| e.into()).unwrap_or_default()
infer_current_exe(info.dlpi_addr as usize)
} else {
OsString::new()
}
Expand Down
242 changes: 242 additions & 0 deletions src/symbolize/gimli/parse_running_mmaps_unix.rs
@@ -0,0 +1,242 @@
// Note: This file is only currently used on targets that call out to the code
// in `mod libs_dl_iterate_phdr` (e.g. linux, freebsd, ...); it may be more
// general purpose, but it hasn't been tested elsewhere.

use super::mystd::fs::File;
use super::mystd::io::Read;
use super::mystd::str::FromStr;
use super::{OsString, String, Vec};

#[derive(PartialEq, Eq, Debug)]
pub(super) struct MapsEntry {
/// start (inclusive) and limit (exclusive) of address range.
address: (usize, usize),
/// The perms field are the permissions for the entry
///
/// r = read
/// w = write
/// x = execute
/// s = shared
/// p = private (copy on write)
perms: [char; 4],
/// Offset into the file (or "whatever").
offset: usize,
/// device (major, minor)
dev: (usize, usize),
/// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS).
inode: usize,
/// Usually the file backing the mapping.
///
/// Note: The man page for proc includes a note about "coordination" by
/// using readelf to see the Offset field in ELF program headers. pnkfelix
/// is not yet sure if that is intended to be a comment on pathname, or what
/// form/purpose such coordination is meant to have.
///
/// There are also some pseudo-paths:
/// "[stack]": The initial process's (aka main thread's) stack.
/// "[stack:<tid>]": a specific thread's stack. (This was only present for a limited range of Linux verisons; it was determined to be too expensive to provide.)
/// "[vdso]": Virtual dynamically linked shared object
/// "[heap]": The process's heap
///
/// The pathname can be blank, which means it is an anonymous mapping
/// obtained via mmap.
///
/// Newlines in pathname are replaced with an octal escape sequence.
///
/// The pathname may have "(deleted)" appended onto it if the file-backed
/// path has been deleted.
///
/// Note that modifications like the latter two indicated above imply that
/// in general the pathname may be ambiguous. (I.e. you cannot tell if the
/// denoted filename actually ended with the text "(deleted)", or if that
/// was added by the maps rendering.
pathname: OsString,
}

pub(super) fn parse_maps() -> Result<Vec<MapsEntry>, &'static str> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic only applies to linux I think. Netbsd has /proc/curproc/. macOS doesn't have /proc at all. As for other unixes I'm not quite sure.

Copy link
Member Author

@pnkfelix pnkfelix Oct 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy to revise this so that only Linux pulls in the "interesting" parse_maps file, and every other target just goes to the no-op variant.

(I was hoping that I had gotten the logic right such that targets that didn't have the /proc/self/maps pseudo-file, or if it failed to parse said file, it would silently just fall back on std::env::current_exe, but I haven't confirmed that yet.)

Copy link
Member Author

@pnkfelix pnkfelix Oct 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked again, and realized that the sole calling module is solely defined here:

} else if #[cfg(all(
any(
target_os = "linux",
target_os = "fuchsia",
target_os = "freebsd",
target_os = "openbsd",
all(target_os = "android", feature = "dl_iterate_phdr"),
),
not(target_env = "uclibc"),
))] {
mod libs_dl_iterate_phdr;

That makes this a lot easier to limit to just these small subset of targets.

I force-pushed an update that fixes that (and backtraces my API to a simpler one now enabled by removing the parse_running_mmaps_noop.rs code).

let mut v = Vec::new();
let mut proc_self_maps =
File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?;
let mut buf = String::new();
let _bytes_read = proc_self_maps
.read_to_string(&mut buf)
.map_err(|_| "Couldn't read /proc/self/maps")?;
for line in buf.lines() {
v.push(line.parse()?);
}

Ok(v)
}

impl MapsEntry {
pub(super) fn pathname(&self) -> &OsString {
&self.pathname
}

pub(super) fn ip_matches(&self, ip: usize) -> bool {
self.address.0 <= ip && ip < self.address.1
}
}

impl FromStr for MapsEntry {
type Err = &'static str;

// Format: address perms offset dev inode pathname
// e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]"
// e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
// e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0"
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s
.split(' ') // space-separated fields
.filter(|s| s.len() > 0); // multiple spaces implies empty strings that need to be skipped.
let range_str = parts.next().ok_or("Couldn't find address")?;
let perms_str = parts.next().ok_or("Couldn't find permissions")?;
let offset_str = parts.next().ok_or("Couldn't find offset")?;
let dev_str = parts.next().ok_or("Couldn't find dev")?;
let inode_str = parts.next().ok_or("Couldn't find inode")?;
let pathname_str = parts.next().unwrap_or(""); // pathname may be omitted.

let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
let address = {
// This could use `range_str.split_once('-')` once the MSRV passes 1.52.
if let Some(idx) = range_str.find('-') {
let (start, rest) = range_str.split_at(idx);
let (_div, limit) = rest.split_at(1);
(hex(start)?, hex(limit)?)
} else {
return Err("Couldn't parse address range");
}
};
let perms: [char; 4] = {
let mut chars = perms_str.chars();
let mut c = || chars.next().ok_or("insufficient perms");
let perms = [c()?, c()?, c()?, c()?];
if chars.next().is_some() {
return Err("too many perms");
}
perms
};
let offset = hex(offset_str)?;
let dev = {
// This could use `dev_str.split_once(':')` once the MSRV passes 1.52.
if let Some(idx) = dev_str.find(':') {
let (major, rest) = dev_str.split_at(idx);
let (_div, minor) = rest.split_at(1);
(hex(major)?, hex(minor)?)
} else {
return Err("Couldn't parse dev")?;
}
};
let inode = hex(inode_str)?;
let pathname = pathname_str.into();

Ok(MapsEntry {
address,
perms,
offset,
dev,
inode,
pathname,
})
}
}

// Make sure we can parse 64-bit sample output if we're on a 64-bit target.
#[cfg(target_pointer_width = "64")]
#[test]
fn check_maps_entry_parsing_64bit() {
assert_eq!(
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 \
[vsyscall]"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0xffffffffff600000, 0xffffffffff601000),
perms: ['-', '-', 'x', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: "[vsyscall]".into(),
}
);

assert_eq!(
"7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 \
/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0x7f5985f46000, 0x7f5985f48000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00039000,
dev: (0x103, 0x06),
inode: 0x76021795,
pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(),
}
);
assert_eq!(
"35b1a21000-35b1a22000 rw-p 00000000 00:00 0"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0x35b1a21000, 0x35b1a22000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: Default::default(),
}
);
}

// (This output was taken from a 32-bit machine, but will work on any target)
#[test]
fn check_maps_entry_parsing_32bit() {
/* Example snippet of output:
08056000-08077000 rw-p 00000000 00:00 0 [heap]
b7c79000-b7e02000 r--p 00000000 08:01 60662705 /usr/lib/locale/locale-archive
b7e02000-b7e03000 rw-p 00000000 00:00 0
*/
assert_eq!(
"08056000-08077000 rw-p 00000000 00:00 0 \
[heap]"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0x08056000, 0x08077000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: "[heap]".into(),
}
);

assert_eq!(
"b7c79000-b7e02000 r--p 00000000 08:01 60662705 \
/usr/lib/locale/locale-archive"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0xb7c79000, 0xb7e02000),
perms: ['r', '-', '-', 'p'],
offset: 0x00000000,
dev: (0x08, 0x01),
inode: 0x60662705,
pathname: "/usr/lib/locale/locale-archive".into(),
}
);
assert_eq!(
"b7e02000-b7e03000 rw-p 00000000 00:00 0"
.parse::<MapsEntry>()
.unwrap(),
MapsEntry {
address: (0xb7e02000, 0xb7e03000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: Default::default(),
}
);
}
14 changes: 14 additions & 0 deletions tests/common/mod.rs
@@ -0,0 +1,14 @@
/// Some tests only make sense in contexts where they can re-exec the test
/// itself. Not all contexts support this, so you can call this method to find
/// out which case you are in.
pub fn cannot_reexec_the_test() -> bool {
// These run in docker containers on CI where they can't re-exec the test,
// so just skip these for CI. No other reason this can't run on those
// platforms though.
// Miri does not have support for re-execing a file
cfg!(unix)
&& (cfg!(target_arch = "arm")
|| cfg!(target_arch = "aarch64")
|| cfg!(target_arch = "s390x"))
|| cfg!(miri)
}
14 changes: 4 additions & 10 deletions tests/concurrent-panics.rs
Expand Up @@ -9,17 +9,11 @@ const PANICS: usize = 100;
const THREADS: usize = 8;
const VAR: &str = "__THE_TEST_YOU_ARE_LUKE";

mod common;

fn main() {
// These run in docker containers on CI where they can't re-exec the test,
// so just skip these for CI. No other reason this can't run on those
// platforms though.
// Miri does not have support for re-execing a file
if cfg!(unix)
&& (cfg!(target_arch = "arm")
|| cfg!(target_arch = "aarch64")
|| cfg!(target_arch = "s390x"))
|| cfg!(miri)
{
// If we cannot re-exec this test, there's no point in trying to do it.
if common::cannot_reexec_the_test() {
println!("test result: ok");
return;
}
Expand Down