Skip to content

Commit

Permalink
Use MaybeUninit while manipulating uninitialized data
Browse files Browse the repository at this point in the history
Because there's no way to manipulate unitialized data to some data types
just through references.

Adding Arena::reserve_extend to allow a somewhat safer pattern how to
initialize the memory without risk of causing UB by panicking before
full initialization, causing the arena to call destructors on bad data.

Closes #38.
  • Loading branch information
vorner committed Nov 26, 2019
1 parent 3e8abe0 commit 0e21154
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 24 deletions.
126 changes: 105 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ use core::iter;
use core::mem;
use core::slice;

use mem::MaybeUninit;

#[cfg(test)]
mod test;

Expand Down Expand Up @@ -273,24 +275,77 @@ impl<T> Arena<T> {

/// Allocates space for a given number of values, but doesn't initialize it.
///
/// ## Unsafety and Undefined Behavior
/// ## Safety
///
/// After calling this method, the arena considers the elements initialized. If you fail to
/// initialize them (which includes because of panicking during the initialization), the arena
/// will run destructors on the uninitialized memory. Therefore, you must initialize them.
///
/// Considering how easy it is to cause undefined behaviour using this, you're advised to
/// prefer the other (safe) methods, like [`alloc_extend`].
///
/// ## Example
///
/// ```rust
/// use std::mem::{self, MaybeUninit};
/// use std::ptr;
/// use typed_arena::Arena;
///
/// // Transmute from MaybeUninit slice to slice of initialized T.
/// // It is a separate function to preserve the lifetime of the reference.
/// unsafe fn transmute_uninit<A>(r: &mut [MaybeUninit<A>]) -> &mut [A] {
/// mem::transmute(r)
/// }
///
/// let arena: Arena<bool> = Arena::new();
/// let slice: &mut [bool];
/// unsafe {
/// let uninitialized = arena.alloc_uninitialized(10);
/// for elem in uninitialized.iter_mut() {
/// ptr::write(elem.as_mut_ptr(), true);
/// }
/// slice = transmute_uninit(uninitialized);
/// }
/// ```
///
/// The same caveats that apply to
/// [`std::mem::uninitialized`](https://doc.rust-lang.org/nightly/std/mem/fn.uninitialized.html)
/// apply here:
/// ## Alternative allocation pattern
///
/// > **This is incredibly dangerous and should not be done lightly. Deeply
/// consider initializing your memory with a default value instead.**
/// To avoid the problem of dropping assumed to be initialized elements on panic, it is also
/// possible to combine the [`reserve_extend`] with [`uninitialized_array`], initialize the
/// elements and confirm them by this method. In such case, when there's a panic during
/// initialization, the already initialized elements would leak but it wouldn't cause UB.
///
/// ```rust
/// use std::mem::{self, MaybeUninit};
/// use std::ptr;
/// use typed_arena::Arena;
///
/// In particular, it is easy to trigger undefined behavior by allocating
/// uninitialized values, failing to properly initialize them, and then the
/// `Arena` will attempt to drop them when it is dropped. Initializing an
/// uninitialized value is trickier than it might seem: a normal assignment
/// to a field will attempt to drop the old, uninitialized value, which
/// almost certainly also triggers undefined behavior. You must also
/// consider all the places where your code might "unexpectedly" drop values
/// earlier than it "should" because of unwinding during panics.
pub unsafe fn alloc_uninitialized(&self, num: usize) -> *mut [T] {
/// unsafe fn transmute_uninit<A>(r: &mut [MaybeUninit<A>]) -> &mut [A] {
/// mem::transmute(r)
/// }
///
/// const COUNT: usize = 2;
///
/// let arena: Arena<String> = Arena::new();
///
/// arena.reserve_extend(COUNT);
/// let slice: &mut [String];
/// unsafe {
/// // Perform initialization before we claim the memory.
/// let uninitialized = arena.uninitialized_array();
/// assert!((*uninitialized).len() >= COUNT); // Ensured by the reserve_extend
/// for elem in &mut (*uninitialized)[..COUNT] {
/// ptr::write(elem.as_mut_ptr(), "Hello".to_owned());
/// }
/// let addr = (*uninitialized).as_ptr() as usize;
///
/// // The alloc_uninitialized returns the same memory, but "confirms" its allocation.
/// slice = transmute_uninit(arena.alloc_uninitialized(COUNT));
/// assert_eq!(addr, slice.as_ptr() as usize);
/// assert_eq!(slice, &["Hello".to_owned(), "Hello".to_owned()]);
/// }
/// ```
pub unsafe fn alloc_uninitialized(&self, num: usize) -> &mut [MaybeUninit<T>] {
let mut chunks = self.chunks.borrow_mut();

debug_assert!(
Expand All @@ -304,8 +359,29 @@ impl<T> Arena<T> {
// At this point, the current chunk must have free capacity.
let next_item_index = chunks.current.len();
chunks.current.set_len(next_item_index + num);
// Extend the lifetime...
&mut chunks.current[next_item_index..] as *mut _

// Go through pointers, to make sure we never create a reference to uninitialized T.
let start = chunks.current.as_mut_ptr().offset(next_item_index as isize);
let start_uninit = start as *mut MaybeUninit<T>;
slice::from_raw_parts_mut(start_uninit, num)
}

/// Makes sure there's enough continuous space for at least `num` elements.
///
/// This may save some work if called before [`alloc_extend`]. It also allows somewhat safer
/// use pattern of [`alloc_uninitialized`]. On the other hand this might waste up to `n - 1`
/// elements of space. In case new allocation is needed, the unused ones in current chunk are
/// never used.
pub fn reserve_extend(&self, num: usize) {
let mut chunks = self.chunks.borrow_mut();

debug_assert!(
chunks.current.capacity() >= chunks.current.len(),
"capacity is always greater than or equal to len, so we don't need to worry about underflow"
);
if num > chunks.current.capacity() - chunks.current.len() {
chunks.reserve(num);
}
}

/// Returns unused space.
Expand All @@ -314,12 +390,20 @@ impl<T> Arena<T> {
/// won't be dropped unless there are further calls to `alloc`,
/// `alloc_uninitialized`, or `alloc_extend` which is why the method is
/// safe.
pub fn uninitialized_array(&self) -> *mut [T] {
let chunks = self.chunks.borrow();
///
/// It returns a raw pointer to avoid creating multiple mutable references to the same place.
/// It is up to the caller not to dereference it after any of the `alloc_` methods are called.
pub fn uninitialized_array(&self) -> *mut [MaybeUninit<T>] {
let mut chunks = self.chunks.borrow_mut();
let len = chunks.current.capacity() - chunks.current.len();
let next_item_index = chunks.current.len();
let slice = &chunks.current[next_item_index..];
unsafe { slice::from_raw_parts_mut(slice.as_ptr() as *mut T, len) as *mut _ }

unsafe {
// Go through pointers, to make sure we never create a reference to uninitialized T.
let start = chunks.current.as_mut_ptr().offset(next_item_index as isize);
let start_uninit = start as *mut MaybeUninit<T>;
slice::from_raw_parts_mut(start_uninit, len) as *mut _
}
}

/// Convert this `Arena` into a `Vec<T>`.
Expand Down
55 changes: 52 additions & 3 deletions src/test.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::*;
use std::cell::Cell;
use std::mem;
use std::panic::{self, AssertUnwindSafe};
use std::ptr;

struct DropTracker<'a>(&'a Cell<u32>);
Expand Down Expand Up @@ -101,8 +102,8 @@ fn test_alloc_uninitialized() {
let arena: Arena<Node> = Arena::with_capacity(4);
for i in 0..LIMIT {
let slice = arena.alloc_uninitialized(i);
for (j, elem) in (&mut *slice).iter_mut().enumerate() {
ptr::write(elem, Node(None, j as u32, DropTracker(&drop_counter)));
for (j, elem) in slice.iter_mut().enumerate() {
ptr::write(elem.as_mut_ptr(), Node(None, j as u32, DropTracker(&drop_counter)));
}
assert_eq!(drop_counter.get(), 0);
}
Expand All @@ -125,14 +126,62 @@ fn test_alloc_extend_with_drop_counter() {
assert_eq!(drop_counter.get(), 200);
}

/// Test with bools.
///
/// Bools, unlike integers, have invalid bit patterns. Therefore, ever having an uninitialized bool
/// is insta-UB. Make sure miri doesn't find any such thing.
#[test]
fn test_alloc_uninitialized_bools() {
const LEN: usize = 20;
unsafe {
let arena: Arena<bool> = Arena::with_capacity(2);
let slice = arena.alloc_uninitialized(LEN);
for elem in slice.iter_mut() {
ptr::write(elem.as_mut_ptr(), true);
}
// Now it is fully initialized, we can safely transmute the slice.
let slice: &mut [bool] = mem::transmute(slice);
assert_eq!(&[true; LEN], slice);
}
}

/// Check nothing bad happens by panicking during initialization of borrowed slice.
#[test]
fn alloc_uninitialized_with_panic() {
struct Dropper(bool);

impl Drop for Dropper {
fn drop(&mut self) {
// Just make sure we touch the value, to make sure miri would bite if it was
// unitialized
if self.0 {
panic!();
}
}
}
let mut reached_first_init = false;
panic::catch_unwind(AssertUnwindSafe(|| unsafe {
let arena: Arena<Dropper> = Arena::new();
arena.reserve_extend(2);
let uninitialized = arena.uninitialized_array();
assert!((*uninitialized).len() >= 2);
ptr::write((*uninitialized)[0].as_mut_ptr(), Dropper(false));
reached_first_init = true;
panic!("To drop the arena");
// If it didn't panic, we would continue by initializing the second one and confirming by
// .alloc_uninitialized();
})).unwrap_err();
assert!(reached_first_init);
}

#[test]
fn test_uninitialized_array() {
let arena = Arena::with_capacity(2);
let uninit = arena.uninitialized_array();
arena.alloc_extend(0..2);
unsafe {
for (&a, b) in (&*uninit).iter().zip(0..2) {
assert_eq!(a, b);
assert_eq!(a.assume_init(), b);
}
assert!((&*arena.uninitialized_array()).as_ptr() != (&*uninit).as_ptr());
arena.alloc(0);
Expand Down

0 comments on commit 0e21154

Please sign in to comment.