Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for keeping pooling allocator pages resident #5207

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 10 additions & 1 deletion crates/fuzzing/src/generators/pooling_config.rs
Expand Up @@ -14,6 +14,9 @@ pub struct PoolingAllocationConfig {
pub instance_table_elements: u32,
pub instance_size: usize,
pub async_stack_zeroing: bool,
pub async_stack_keep_resident: usize,
pub linear_memory_keep_resident: usize,
pub table_keep_resident: usize,
}

impl PoolingAllocationConfig {
Expand All @@ -28,7 +31,10 @@ impl PoolingAllocationConfig {
.instance_memory_pages(self.instance_memory_pages)
.instance_table_elements(self.instance_table_elements)
.instance_size(self.instance_size)
.async_stack_zeroing(self.async_stack_zeroing);
.async_stack_zeroing(self.async_stack_zeroing)
.async_stack_keep_resident(self.async_stack_keep_resident)
.linear_memory_keep_resident(self.linear_memory_keep_resident)
.table_keep_resident(self.table_keep_resident);
cfg
}
}
Expand All @@ -51,6 +57,9 @@ impl<'a> Arbitrary<'a> for PoolingAllocationConfig {
instance_count: u.int_in_range(1..=MAX_COUNT)?,
instance_size: u.int_in_range(0..=MAX_SIZE)?,
async_stack_zeroing: u.arbitrary()?,
async_stack_keep_resident: u.int_in_range(0..=1 << 20)?,
linear_memory_keep_resident: u.int_in_range(0..=1 << 20)?,
table_keep_resident: u.int_in_range(0..=1 << 20)?,
})
}
}
Expand Down
225 changes: 190 additions & 35 deletions crates/runtime/src/cow.rs
Expand Up @@ -466,39 +466,23 @@ impl MemoryImageSlot {
Ok(())
}

/// Resets this linear memory slot back to a "pristine state".
///
/// This will reset the memory back to its original contents on Linux or
/// reset the contents back to zero on other platforms. The `keep_resident`
/// argument is the maximum amount of memory to keep resident in this
/// process's memory on Linux. Up to that much memory will be `memset` to
/// zero where the rest of it will be reset or released with `madvise`.
#[allow(dead_code)] // ignore warnings as this is only used in some cfgs
pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
pub(crate) fn clear_and_remain_ready(&mut self, keep_resident: usize) -> Result<()> {
assert!(self.dirty);

cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
// On Linux we can use `madvise` to reset the virtual memory
// back to its original state. This means back to all zeros for
// anonymous-backed pages and back to the original contents for
// CoW memory (the initial heap image). This has the precise
// semantics we want for reuse between instances, so it's all we
// need to do.
unsafe {
rustix::mm::madvise(
self.base as *mut c_void,
self.cur_size,
rustix::mm::Advice::LinuxDontNeed,
)?;
}
} else {
// If we're not on Linux, however, then there's no generic
// platform way to reset memory back to its original state, so
// instead this is "feigned" by resetting memory back to
// entirely zeros with an anonymous backing.
//
// Additionally the previous image, if any, is dropped here
// since it's no longer applicable to this mapping.
self.reset_with_anon_memory()?;
self.image = None;
}
unsafe {
self.reset_all_memory_contents(keep_resident)?;
}

// mprotect the initial heap region beyond the initial heap size back to PROT_NONE.
// mprotect the initial heap region beyond the initial heap size back to
// PROT_NONE.
self.set_protection(
self.initial_size..self.cur_size,
rustix::mm::MprotectFlags::empty(),
Expand All @@ -508,6 +492,136 @@ impl MemoryImageSlot {
Ok(())
}

#[allow(dead_code)] // ignore warnings as this is only used in some cfgs
unsafe fn reset_all_memory_contents(&mut self, keep_resident: usize) -> Result<()> {
if !cfg!(target_os = "linux") {
// If we're not on Linux then there's no generic platform way to
// reset memory back to its original state, so instead reset memory
// back to entirely zeros with an anonymous backing.
//
// Additionally the previous image, if any, is dropped here
// since it's no longer applicable to this mapping.
return self.reset_with_anon_memory();
}

match &self.image {
Some(image) => {
assert!(self.cur_size >= image.linear_memory_offset + image.len);
if image.linear_memory_offset < keep_resident {
// If the image starts below the `keep_resident` then
// memory looks something like this:
//
// up to `keep_resident` bytes
// |
// +--------------------------+ remaining_memset
// | | /
// <--------------> <------->
//
// image_end
// 0 linear_memory_offset | cur_size
// | | | |
// +----------------+--------------+---------+--------+
// | dirty memory | image | dirty memory |
// +----------------+--------------+---------+--------+
//
// <------+-------> <-----+-----> <---+---> <--+--->
// | | | |
// | | | |
// memset (1) / | madvise (4)
// mmadvise (2) /
// /
// memset (3)
//
//
// In this situation there are two disjoint regions that are
// `memset` manually to zero. Note that `memset (3)` may be
// zero bytes large. Furthermore `madvise (4)` may also be
// zero bytes large.

let image_end = image.linear_memory_offset + image.len;
let mem_after_image = self.cur_size - image_end;
let remaining_memset =
(keep_resident - image.linear_memory_offset).min(mem_after_image);

// This is memset (1)
std::ptr::write_bytes(self.base as *mut u8, 0u8, image.linear_memory_offset);

// This is madvise (2)
self.madvise_reset(image.linear_memory_offset, image.len)?;

// This is memset (3)
std::ptr::write_bytes(
(self.base + image_end) as *mut u8,
0u8,
remaining_memset,
);

// This is madvise (4)
self.madvise_reset(
image_end + remaining_memset,
mem_after_image - remaining_memset,
)?;
} else {
// If the image starts after the `keep_resident` threshold
// then we memset the start of linear memory and then use
// madvise below for the rest of it, including the image.
//
// 0 keep_resident cur_size
// | | |
// +----------------+---+----------+------------------+
// | dirty memory | image | dirty memory |
// +----------------+---+----------+------------------+
//
// <------+-------> <-------------+----------------->
// | |
// | |
// memset (1) madvise (2)
//
// Here only a single memset is necessary since the image
// started after the threshold which we're keeping resident.
// Note that the memset may be zero bytes here.

// This is memset (1)
std::ptr::write_bytes(self.base as *mut u8, 0u8, keep_resident);

// This is madvise (2)
self.madvise_reset(keep_resident, self.cur_size - keep_resident)?;
}
}

// If there's no memory image for this slot then memset the first
// bytes in the memory back to zero while using `madvise` to purge
// the rest.
None => {
let size_to_memset = keep_resident.min(self.cur_size);
std::ptr::write_bytes(self.base as *mut u8, 0u8, size_to_memset);
self.madvise_reset(size_to_memset, self.cur_size - size_to_memset)?;
}
}

Ok(())
}

#[allow(dead_code)] // ignore warnings as this is only used in some cfgs
unsafe fn madvise_reset(&self, base: usize, len: usize) -> Result<()> {
assert!(base + len <= self.cur_size);
if len == 0 {
return Ok(());
}
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
rustix::mm::madvise(
(self.base + base) as *mut c_void,
len,
rustix::mm::Advice::LinuxDontNeed,
)?;
Ok(())
} else {
unreachable!();
}
}
}

fn set_protection(&self, range: Range<usize>, flags: rustix::mm::MprotectFlags) -> Result<()> {
assert!(range.start <= range.end);
assert!(range.end <= self.static_size);
Expand All @@ -532,7 +646,7 @@ impl MemoryImageSlot {

/// Map anonymous zeroed memory across the whole slot,
/// inaccessible. Used both during instantiate and during drop.
fn reset_with_anon_memory(&self) -> Result<()> {
fn reset_with_anon_memory(&mut self) -> Result<()> {
unsafe {
let ptr = rustix::mm::mmap_anonymous(
self.base as *mut c_void,
Expand All @@ -542,6 +656,11 @@ impl MemoryImageSlot {
)?;
assert_eq!(ptr as usize, self.base);
}

self.image = None;
self.cur_size = 0;
self.initial_size = 0;

Ok(())
}
}
Expand Down Expand Up @@ -638,7 +757,7 @@ mod test {
assert_eq!(0, slice[131071]);
// instantiate again; we should see zeroes, even as the
// reuse-anon-mmap-opt kicks in
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
assert!(!memfd.is_dirty());
memfd.instantiate(64 << 10, None).unwrap();
let slice = mmap.as_slice();
Expand All @@ -661,33 +780,69 @@ mod test {
assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
slice[4096] = 5;
// Clear and re-instantiate same image
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
memfd.instantiate(64 << 10, Some(&image)).unwrap();
let slice = mmap.as_slice();
// Should not see mutation from above
assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
// Clear and re-instantiate no image
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
memfd.instantiate(64 << 10, None).unwrap();
assert!(!memfd.has_image());
let slice = mmap.as_slice();
assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]);
// Clear and re-instantiate image again
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
memfd.instantiate(64 << 10, Some(&image)).unwrap();
let slice = mmap.as_slice();
assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
// Create another image with different data.
let image2 = Arc::new(create_memfd_with_data(4096, &[10, 11, 12, 13]).unwrap());
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
memfd.instantiate(128 << 10, Some(&image2)).unwrap();
let slice = mmap.as_slice();
assert_eq!(&[10, 11, 12, 13], &slice[4096..4100]);
// Instantiate the original image again; we should notice it's
// a different image and not reuse the mappings.
memfd.clear_and_remain_ready().unwrap();
memfd.clear_and_remain_ready(0).unwrap();
memfd.instantiate(64 << 10, Some(&image)).unwrap();
let slice = mmap.as_slice();
assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
}

#[test]
#[cfg(target_os = "linux")]
fn memset_instead_of_madvise() {
let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
memfd.no_clear_on_drop();

// Test basics with the image
for image_off in [0, 4096, 8 << 10] {
let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
memfd.instantiate(64 << 10, Some(&image)).unwrap();
assert!(memfd.has_image());
let slice = mmap.as_mut_slice();
if image_off > 0 {
assert_eq!(slice[image_off - 1], 0);
}
assert_eq!(slice[image_off + 5], 0);
assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
slice[image_off] = 5;
assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
memfd.clear_and_remain_ready(amt_to_memset).unwrap();
}
}

// Test without an image
for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
memfd.instantiate(64 << 10, None).unwrap();
for chunk in mmap.as_mut_slice()[..64 << 10].chunks_mut(1024) {
assert_eq!(chunk[0], 0);
chunk[0] = 5;
}
memfd.clear_and_remain_ready(amt_to_memset).unwrap();
}
}
}
2 changes: 1 addition & 1 deletion crates/runtime/src/cow_disabled.rs
Expand Up @@ -57,7 +57,7 @@ impl MemoryImageSlot {
unreachable!();
}

pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
pub(crate) fn clear_and_remain_ready(&mut self, _keep_resident: usize) -> Result<()> {
unreachable!();
}

Expand Down