Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the definition of sigevent #2813

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion ci/style.rs
Expand Up @@ -177,7 +177,15 @@ fn check_style(file: &str, path: &Path, err: &mut Errors) {
}
if s_macros == 2 {
s_macros += 1;
err.error(path, i, "multiple s! macros in one module");
// Can't enforce this rule until after raising the MSRV to 1.19.0 or
// later. It seems that earlier Rust versions ignore #[cfg()]
// attributes within the macro. As a result, it's sometimes
// necessary to have multiple s!{} macros in a single file. It's
// hard to debug, because cargo-expand doesn't work with such
// old versions.
// See also https://github.com/rust-lang/libc/pull/2813

// err.error(path, i, "multiple s! macros in one module");
}

state = line_state;
Expand Down
32 changes: 27 additions & 5 deletions libc-test/build.rs
Expand Up @@ -1477,7 +1477,7 @@ fn test_dragonflybsd(target: &str) {
// just insert some padding.
(struct_ == "siginfo_t" && field == "_pad") ||
// sigev_notify_thread_id is actually part of a sigev_un union
(struct_ == "sigevent" && field == "sigev_notify_thread_id")
(struct_ == "sigevent_0_2_126" && field == "sigev_notify_thread_id")
});

cfg.generate("../src/lib.rs", "main.rs");
Expand Down Expand Up @@ -1724,6 +1724,8 @@ fn test_android(target: &str) {
// sigval is a struct in Rust, but a union in C:
"sigval" => format!("union sigval"),

"sigevent_0_2_126" => "struct sigevent".to_string(),

// put `struct` in front of all structs:.
t if is_struct => format!("struct {}", t),

Expand Down Expand Up @@ -1970,6 +1972,8 @@ fn test_android(target: &str) {
(struct_ == "sigevent" && field == "sigev_value") ||
// this one is an anonymous union
(struct_ == "ff_effect" && field == "u") ||
(struct_ == "sigevent_0_2_126" && field == "sigev_value") ||
(struct_ == "sigevent" && field == "_sigev_un") ||
// FIXME: `sa_sigaction` has type `sighandler_t` but that type is
// incorrect, see: https://github.com/rust-lang/libc/issues/1359
(struct_ == "sigaction" && field == "sa_sigaction") ||
Expand Down Expand Up @@ -2163,6 +2167,8 @@ fn test_freebsd(target: &str) {
// FIXME: https://github.com/rust-lang/libc/issues/1273
"sighandler_t" => "sig_t".to_string(),

"sigevent_0_2_126" => "struct sigevent".to_string(),

t if is_union => format!("union {}", t),

t if t.ends_with("_t") => t.to_string(),
Expand Down Expand Up @@ -2574,6 +2580,7 @@ fn test_freebsd(target: &str) {
("if_data", "__ifi_lastchange") => true,
("ifreq", "ifr_ifru") => true,
("ifconf", "ifc_ifcu") => true,
("sigevent", "_sigev_un") => true,

// anonymous struct
("devstat", "dev_links") => true,
Expand Down Expand Up @@ -2722,6 +2729,8 @@ fn test_emscripten(target: &str) {
// typedefs don't need any keywords
t if t.ends_with("_t") => t.to_string(),

"sigevent_0_2_126" => "struct sigevent".to_string(),

// put `struct` in front of all structs:.
t if is_struct => format!("struct {}", t),

Expand Down Expand Up @@ -2761,6 +2770,9 @@ fn test_emscripten(target: &str) {
});

cfg.skip_struct(move |ty| {
if ty.starts_with("__c_anonymous_") {
return true;
}
match ty {
// This is actually a union, not a struct
// FIXME: is this necessary?
Expand Down Expand Up @@ -2847,6 +2859,7 @@ fn test_emscripten(target: &str) {
// sigval is actually a union, but we pretend it's a struct
// FIXME: is this necessary?
(struct_ == "sigevent" && field == "sigev_value") ||
(struct_ == "sigevent_0_2_126" && field == "sigev_value") ||
// aio_buf is "volatile void*" and Rust doesn't understand volatile
// FIXME: is this necessary?
(struct_ == "aiocb" && field == "aio_buf")
Expand All @@ -2863,8 +2876,10 @@ fn test_emscripten(target: &str) {
// musl seems to define this as an *anonymous* bitfield
// FIXME: is this necessary?
(struct_ == "statvfs" && field == "__f_unused") ||
// sigev_notify_thread_id is actually part of a sigev_un union
(struct_ == "sigevent" && field == "sigev_notify_thread_id") ||
// union field
(struct_ == "sigevent" && field == "_sigev_un") ||
// union field on the backwards-compat struct definition
(struct_ == "sigevent_0_2_126" && field == "sigev_notify_thread_id") ||
// signalfd had SIGSYS fields added in Linux 4.18, but no libc release has them yet.
(struct_ == "signalfd_siginfo" && (field == "ssi_addr_lsb" ||
field == "_pad2" ||
Expand Down Expand Up @@ -3486,6 +3501,9 @@ fn test_linux(target: &str) {

// typedefs don't need any keywords
t if t.ends_with("_t") => t.to_string(),

"sigevent_0_2_126" => "struct sigevent".to_string(),

// put `struct` in front of all structs:.
t if is_struct => format!("struct {}", t),
// put `union` in front of all unions:
Expand Down Expand Up @@ -4178,6 +4196,7 @@ fn test_linux(target: &str) {
(struct_ == "utmpx" && field == "ut_tv") ||
// sigval is actually a union, but we pretend it's a struct
(struct_ == "sigevent" && field == "sigev_value") ||
(struct_ == "sigevent_0_2_126" && field == "sigev_value") ||
// this one is an anonymous union
(struct_ == "ff_effect" && field == "u") ||
// `__exit_status` type is a patch which is absent in musl
Expand All @@ -4204,8 +4223,10 @@ fn test_linux(target: &str) {
(musl && struct_ == "glob_t" && field == "gl_flags") ||
// musl seems to define this as an *anonymous* bitfield
(musl && struct_ == "statvfs" && field == "__f_unused") ||
// sigev_notify_thread_id is actually part of a sigev_un union
(struct_ == "sigevent" && field == "sigev_notify_thread_id") ||
// union field
(struct_ == "sigevent" && field == "_sigev_un") ||
// union field on the backwards-compat struct definition
(struct_ == "sigevent_0_2_126" && field == "sigev_notify_thread_id") ||
// signalfd had SIGSYS fields added in Linux 4.18, but no libc release
// has them yet.
(struct_ == "signalfd_siginfo" && (field == "ssi_addr_lsb" ||
Expand Down Expand Up @@ -4717,6 +4738,7 @@ fn test_haiku(target: &str) {
("sem_t", "named_sem_id") => true,
("sigaction", "sa_sigaction") => true,
("sigevent", "sigev_value") => true,
("sigevent_0_2_126", "sigev_value") => true,
("fpu_state", "_fpreg") => true,
("cpu_topology_node_info", "data") => true,
// these fields have a simplified data definition in libc
Expand Down
166 changes: 144 additions & 22 deletions src/unix/bsd/freebsdlike/freebsd/mod.rs
Expand Up @@ -1393,18 +1393,6 @@ s_no_extra_traits! {
__reserved: [::c_long; 4]
}

pub struct sigevent {
pub sigev_notify: ::c_int,
pub sigev_signo: ::c_int,
pub sigev_value: ::sigval,
//The rest of the structure is actually a union. We expose only
//sigev_notify_thread_id because it's the most useful union member.
pub sigev_notify_thread_id: ::lwpid_t,
#[cfg(target_pointer_width = "64")]
__unused1: ::c_int,
__unused2: [::c_long; 7]
}

pub struct ptsstat {
#[cfg(any(freebsd12, freebsd13, freebsd14))]
pub dev: u64,
Expand Down Expand Up @@ -1637,6 +1625,95 @@ s_no_extra_traits! {
}
}

#[cfg(libc_union)]
s! {
pub struct __c_anonymous_sigev_thread {
pub _function: *mut ::c_void, // Actually a function pointer
pub _attribute: *mut ::pthread_attr_t,
}

// When sigevent was first added to libc, Rust still didn't support unions.
// So the definition only included one of the union's member. This
// structure exists for backwards-compatibility with consumers that still
// try to access that one member.
#[doc(hidden)]
#[deprecated(
since = "0.2.147",
note = "Use sigevent instead"
)]
pub struct sigevent_0_2_126 {
pub sigev_notify: ::c_int,
pub sigev_signo: ::c_int,
pub sigev_value: ::sigval,
pub sigev_notify_thread_id: ::lwpid_t,
#[cfg(target_pointer_width = "64")]
__unused1: ::c_int,
__unused2: [::c_long; 7]
}
}

#[cfg(libc_union)]
s_no_extra_traits! {
// Can't correctly impl Debug for unions
#[allow(missing_debug_implementations)]
pub union __c_anonymous_sigev_un {
pub _threadid: ::__lwpid_t,
pub _sigev_thread: __c_anonymous_sigev_thread,
pub _kevent_flags: ::c_ushort,
__spare__: [::c_long; 8],
}

pub struct sigevent {
pub sigev_notify: ::c_int,
pub sigev_signo: ::c_int,
pub sigev_value: ::sigval,
pub _sigev_un: __c_anonymous_sigev_un,
/// Exists just to prevent the struct from being safely constructed,
/// because the Debug, Hash, PartialImpl, and
/// Deref<Target=sigevent_0_2_126> trait impls might read uninitialized
/// fields of _sigev_un. This field may be removed once those trait
/// impls are.
_private: ()
}
}

#[cfg(not(libc_union))]
s_no_extra_traits! {
pub struct sigevent {
pub sigev_notify: ::c_int,
pub sigev_signo: ::c_int,
pub sigev_value: ::sigval,
pub _unused0: ::lwpid_t,
#[cfg(target_pointer_width = "64")]
__unused1: ::c_int,
__unused2: [::c_long; 7],
/// Exists just to prevent the struct from being safely constructed,
/// because the Debug, Hash, PartialImpl, and
/// Deref<Target=sigevent_0_2_126> trait impls might read uninitialized
/// fields of _sigev_un. This field may be removed once those trait
/// impls are.
_private: ()
}
}

#[allow(deprecated)]
#[cfg(libc_union)]
impl ::core::ops::Deref for sigevent {
type Target = sigevent_0_2_126;

fn deref(&self) -> &Self::Target {
unsafe { &*(self as *const Self as *const sigevent_0_2_126) }
Comment on lines +1701 to +1705
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This Deref impl exposes UB to safe code on freebsd.

fn main() {
    let sigevent = libc::sigevent {
        sigev_notify: 0,
        sigev_signo: 0,
        sigev_value: libc::sigval {
            sival_ptr: 0 as *mut _,
        },
        _sigev_un: libc::__c_anonymous_sigev_un {
            _kevent_flags: 0,
        },
    };
    let _val = sigevent.sigev_notify_thread_id;
}

Tested in miri using rust-lang/miri#2221:

$ cargo miri run --target x86_64-unknown-freebsd

error: Undefined Behavior: type validation failed: encountered uninitialized bytes, but expected initialized bytes
  --> src/main.rs:12:16
   |
12 |     let _val = sigevent.sigev_notify_thread_id;
   |                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ type validation failed: encountered uninitialized bytes, but expected initialized bytes
   |
   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
           
   = note: inside `main` at src/main.rs:12:16

@Amanieu I thought this exact unsoundness issue came up in a recent T-libs-api meeting. Did we end up deciding that this is "okay enough" as a tradeoff? I didn't see it discussed explicitly here on the PR.

Screenshot from 2022-06-20 21-58-56

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But that field will never be uninitialized, because no program should ever use both _sigev_un and sigev_notify_thread_id. The latter only exists to provide backwards-compatibility with the pre-union code. So anybody accessing that field should already have initialized the entire structure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get that what you said is what programs should do. If the libc crate ensures that programs do as you described, then that's great. If the programmer needs to ensure they do as you described or else they get memory unsafety (which is what is currently implemented in this PR) then we need to make them write unsafe somewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that the old, non-union version of sigevent is already in the wild. And since it isn't unsafe, there simply isn't any way for libc to fix the definition of sigevent, be backwards-compatible, and be unsafe. The deref compromise suggested by @Amanieu is IMHO the best compromise.
FWIW there is exactly one currently-maintained crate on crates.io that uses the old sigev_notify_thread_id field, and I'm a maintainer of that crate, so I can fix any resulting bugs. It's my intention to release a new version of Nix as soon as libc releases a new version with this PR. However, the backwards-compatibility is still useful for users of older Nix versions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there simply isn't any way for libc to fix the definition of sigevent, be backwards-compatible, and be unsafe

Before this PR, libc::sigevent cannot be constructed in safe code because it has private padding fields. After this PR it can be constructed as shown above, which is a soundness bug. Preserving the property that it cannot be constructed in safe code is not a breaking change. The safety requirement on constructing a sigevent can be that you never expose it in a way that would mismatch the sigev_notify with the active variant of _sigev_un.

You can use #[non_exhaustive] if libc uses that these days, or just a zero sized private field with appropriate comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, we could do that. But it would also have the side effect of making sigevent not safely constructable, just to support the backwards-compatibility use-case. That seems like an unnecessary handicap going forward. Would you be satisfied if we added #[deprecated] to the sigevent_0_2_126 struct and its sigev_notify_thread_id field? That's probably something we ought to do anyway.

}
}

#[allow(deprecated)]
#[cfg(libc_union)]
impl ::core::ops::DerefMut for sigevent {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { &mut *(self as *mut Self as *mut sigevent_0_2_126) }
}
}

cfg_if! {
if #[cfg(feature = "extra_traits")] {
impl PartialEq for utmpx {
Expand Down Expand Up @@ -1826,33 +1903,78 @@ cfg_if! {
}
}

#[cfg(libc_union)]
impl PartialEq for sigevent {
fn eq(&self, other: &sigevent) -> bool {
self.sigev_notify == other.sigev_notify
&& self.sigev_signo == other.sigev_signo
&& self.sigev_value == other.sigev_value
// sigev_notify indicates which union fields are valid
&& match self.sigev_notify {
::SIGEV_NONE => true,
::SIGEV_SIGNAL => true,
::SIGEV_THREAD => unsafe {
self._sigev_un._sigev_thread
== other._sigev_un._sigev_thread
},
::SIGEV_KEVENT => unsafe {
self._sigev_un._kevent_flags
== other._sigev_un._kevent_flags
},
::SIGEV_THREAD_ID => unsafe {
self._sigev_un._threadid
== other._sigev_un._threadid
},
Comment on lines +1916 to +1927
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All these unsafe blocks are reading and comparing potentially uninitialized data from the union.

fn main() {
    let sigevent = libc::sigevent {
        sigev_notify: libc::SIGEV_THREAD,
        sigev_signo: 0,
        sigev_value: libc::sigval {
            sival_ptr: 0 as *mut _,
        },
        _sigev_un: libc::__c_anonymous_sigev_un {
            _kevent_flags: 0,
        },
    };
    let _eq = sigevent == sigevent;
}
error: Undefined Behavior: type validation failed: encountered uninitialized raw pointer
    --> /git/libc/src/unix/bsd/freebsdlike/freebsd/mod.rs:233:1
     |
233  | / s! {
234  | |     pub struct aiocb {
235  | |         pub aio_fildes: ::c_int,
236  | |         pub aio_offset: ::off_t,
...    |
1013 | |     }
1014 | | }
     | |_^ type validation failed: encountered uninitialized raw pointer
     |
     = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
     = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
             
     = note: inside `<libc::__c_anonymous_sigev_thread as std::cmp::PartialEq>::eq` at /git/libc/src/unix/bsd/freebsdlike/freebsd/mod.rs:876:9
     = note: inside `<libc::sigevent as std::cmp::PartialEq>::eq` at /git/libc/src/unix/bsd/freebsdlike/freebsd/mod.rs:1445:29
note: inside `main` at src/main.rs:12:15
    --> src/main.rs:12:15
     |
12   |     let _eq = sigevent == sigevent;
     |               ^^^^^^^^^^^^^^^^^^^^
     = note: this error originates in the derive macro `PartialEq` (in Nightly builds, run with -Z macro-backtrace for more info)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. I think we should remove most trait impls for any struct that contains a union (see #2816). But how to do it without breaking existing code?

_ => false
}
}
}
#[cfg(not(libc_union))]
impl PartialEq for sigevent {
fn eq(&self, other: &sigevent) -> bool {
self.sigev_notify == other.sigev_notify
&& self.sigev_signo == other.sigev_signo
&& self.sigev_value == other.sigev_value
&& self.sigev_notify_thread_id
== other.sigev_notify_thread_id
}
}
impl Eq for sigevent {}
impl ::fmt::Debug for sigevent {
fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result {
f.debug_struct("sigevent")
.field("sigev_notify", &self.sigev_notify)
.field("sigev_signo", &self.sigev_signo)
.field("sigev_value", &self.sigev_value)
.field("sigev_notify_thread_id",
&self.sigev_notify_thread_id)
.finish()
let mut ds = f.debug_struct("sigevent");
ds.field("sigev_notify", &self.sigev_notify);
ds.field("sigev_signo", &self.sigev_signo);
ds.field("sigev_value", &self.sigev_value);
#[cfg(libc_union)]
// The sigev_notify field indicates which union fields are valid
unsafe {
match self.sigev_notify {
::SIGEV_THREAD => ds.field("_sigev_thread",
&self._sigev_un._sigev_thread),
::SIGEV_KEVENT => ds.field("_kevent_flags",
&self._sigev_un._kevent_flags),
::SIGEV_THREAD_ID => ds.field("_threadid",
&self._sigev_un._threadid),
Comment on lines +1951 to +1956
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

fn main() {
    let sigevent = libc::sigevent {
        sigev_notify: libc::SIGEV_THREAD,
        sigev_signo: 0,
        sigev_value: libc::sigval {
            sival_ptr: 0 as *mut _,
        },
        _sigev_un: libc::__c_anonymous_sigev_un {
            _kevent_flags: 0,
        },
    };
    println!("{:?}", sigevent);
}

_ => &mut ds
}
};
ds.finish()
}
}
impl ::hash::Hash for sigevent {
fn hash<H: ::hash::Hasher>(&self, state: &mut H) {
self.sigev_notify.hash(state);
self.sigev_signo.hash(state);
self.sigev_value.hash(state);
self.sigev_notify_thread_id.hash(state);
#[cfg(libc_union)]
// The sigev_notify field indicates which union fields are valid
unsafe {
match self.sigev_notify {
::SIGEV_THREAD => self._sigev_un._sigev_thread.hash(state),
::SIGEV_KEVENT => self._sigev_un._kevent_flags.hash(state),
::SIGEV_THREAD_ID => self._sigev_un._threadid.hash(state),
Comment on lines +1972 to +1974
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

I think either these impls need to be removed because sigev_notify is not trustworthy, or constructing sigevent with a user-controlled sigev_notify needs to be unsafe in some way, or constructing sigevent needs to be disallowed altogether.

_ => ()
};
}
}
}

Expand Down