Skip to content

Commit

Permalink
io: rewrite slab to support compaction
Browse files Browse the repository at this point in the history
The I/O driver uses a slab to store per-resource state. Doing this
provides two benefits. First, allocating state is streamlined. Second,
resources may be safetly indexed using a `usize` type. The `usize` is
used passed to the OS's selector when registering for receiving events.

The original slab implementation used a `Vec` backed by `RwLock`. This
primarily caused contention when reading state. This implementation also
only **grew** the slab capacity but never shrank. In #1625, the slab was
rewritten to use a lock-free strategy. The lock contention was removed
but this implementation was still grow-only.

This change adds the ability to release memory. Similar to the previous
implementation, it structures the slab to use a vector of pages. This
enables growing the slab without having to move any previous entries. It
also adds the ability to release pages. This is done by introducing a
lock when allocating / releasing slab entries. This does not impact
benchmarks, primarily due to the existing implementation not being
"done" and also having a lock around allocating and releasing.

A `Slab::compact()` function is added. Pages are iterated. When a page
is found with no slots in use, the page is freed. The `compact()`
function is called occassionally by the I/O driver.
  • Loading branch information
carllerche committed Aug 10, 2020
1 parent d8490c1 commit a04ab1a
Show file tree
Hide file tree
Showing 17 changed files with 881 additions and 1,229 deletions.
114 changes: 72 additions & 42 deletions tokio/src/io/driver/mod.rs
Expand Up @@ -3,23 +3,30 @@ pub(crate) mod platform;
mod scheduled_io;
pub(crate) use scheduled_io::ScheduledIo; // pub(crate) for tests

use crate::loom::sync::atomic::AtomicUsize;
use crate::park::{Park, Unpark};
use crate::runtime::context;
use crate::util::slab::{Address, Slab};
use crate::util::bit;
use crate::util::slab::{self, Slab};

use mio::event::Evented;
use std::fmt;
use std::io;
use std::sync::atomic::Ordering::SeqCst;
use std::sync::{Arc, Weak};
use std::task::Waker;
use std::time::Duration;

/// I/O driver, backed by Mio
pub(crate) struct Driver {
/// Tracks the number of times `turn` is called. It is safe for this to wrap
/// as it is mostly used to determine when to call `compact()`
tick: u16,

/// Reuse the `mio::Events` value across calls to poll.
events: mio::Events,
events: Option<mio::Events>,

/// Primary slab handle containing the state for each resource registered
/// with this driver.
resources: Slab<ScheduledIo>,

/// State shared between the reactor and the handles.
inner: Arc<Inner>,
Expand All @@ -37,11 +44,8 @@ pub(super) struct Inner {
/// The underlying system event queue.
io: mio::Poll,

/// Dispatch slabs for I/O and futures events
pub(super) io_dispatch: Slab<ScheduledIo>,

/// The number of sources in `io_dispatch`.
n_sources: AtomicUsize,
/// Allocates `ScheduledIo` handles when creating new resources.
pub(super) io_dispatch: slab::Allocator<ScheduledIo>,

/// Used to wake up the reactor from a call to `turn`
wakeup: mio::SetReadiness,
Expand All @@ -53,7 +57,19 @@ pub(super) enum Direction {
Write,
}

const TOKEN_WAKEUP: mio::Token = mio::Token(Address::NULL);
// TODO: Don't use a fake token. Instead, reserve a slot entry for the wakeup
// token.
const TOKEN_WAKEUP: mio::Token = mio::Token(1 << 31);

const ADDRESS: bit::Pack = bit::Pack::least_significant(24);

// Packs the generation value in the `readiness` field.
//
// The generation prevents a race condition where a slab slot is reused for a
// new socket while the I/O driver is about to apply a readiness event. The
// generaton value is checked when setting new readiness. If the generation do
// not match, then the readiness event is discarded.
const GENERATION: bit::Pack = ADDRESS.then(7);

fn _assert_kinds() {
fn _assert<T: Send + Sync>() {}
Expand All @@ -69,6 +85,8 @@ impl Driver {
pub(crate) fn new() -> io::Result<Driver> {
let io = mio::Poll::new()?;
let wakeup_pair = mio::Registration::new2();
let slab = Slab::new();
let allocator = slab.allocator();

io.register(
&wakeup_pair.0,
Expand All @@ -78,12 +96,13 @@ impl Driver {
)?;

Ok(Driver {
events: mio::Events::with_capacity(1024),
tick: 0,
events: Some(mio::Events::with_capacity(1024)),
resources: slab,
_wakeup_registration: wakeup_pair.0,
inner: Arc::new(Inner {
io,
io_dispatch: Slab::new(),
n_sources: AtomicUsize::new(0),
io_dispatch: allocator,
wakeup: wakeup_pair.1,
}),
})
Expand All @@ -102,16 +121,27 @@ impl Driver {
}

fn turn(&mut self, max_wait: Option<Duration>) -> io::Result<()> {
// How often to call `compact()` on the resource slab
const COMPACT_INTERVAL: u16 = 256;

self.tick = self.tick.wrapping_add(1);

if self.tick % COMPACT_INTERVAL == 0 {
self.resources.compact();
}

let mut events = self.events.take().expect("i/o driver event store missing");

// Block waiting for an event to happen, peeling out how many events
// happened.
match self.inner.io.poll(&mut self.events, max_wait) {
match self.inner.io.poll(&mut events, max_wait) {
Ok(_) => {}
Err(e) => return Err(e),
}

// Process all the events that came in, dispatching appropriately

for event in self.events.iter() {
for event in events.iter() {
let token = event.token();

if token == TOKEN_WAKEUP {
Expand All @@ -124,22 +154,24 @@ impl Driver {
}
}

self.events = Some(events);

Ok(())
}

fn dispatch(&self, token: mio::Token, ready: mio::Ready) {
fn dispatch(&mut self, token: mio::Token, ready: mio::Ready) {
let mut rd = None;
let mut wr = None;

let address = Address::from_usize(token.0);
let addr = slab::Address::from_usize(ADDRESS.unpack(token.0));

let io = match self.inner.io_dispatch.get(address) {
let io = match self.resources.get(addr) {
Some(io) => io,
None => return,
};

if io
.set_readiness(address, |curr| curr | ready.as_usize())
.set_readiness(Some(token.0), |curr| curr | ready.as_usize())
.is_err()
{
// token no longer valid!
Expand All @@ -164,6 +196,18 @@ impl Driver {
}
}

impl Drop for Driver {
fn drop(&mut self) {
self.resources.for_each(|io| {
// If a task is waiting on the I/O resource, notify it. The task
// will then attempt to use the I/O resource and fail due to the
// driver being shutdown.
io.reader.wake();
io.writer.wake();
})
}
}

impl Park for Driver {
type Unpark = Handle;
type Error = io::Error;
Expand Down Expand Up @@ -246,46 +290,32 @@ impl Inner {
&self,
source: &dyn Evented,
ready: mio::Ready,
) -> io::Result<Address> {
let address = self.io_dispatch.alloc().ok_or_else(|| {
) -> io::Result<slab::Ref<ScheduledIo>> {
let (address, shared) = self.io_dispatch.allocate().ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
"reactor at max registered I/O resources",
)
})?;

self.n_sources.fetch_add(1, SeqCst);
let token = GENERATION.pack(shared.generation(), ADDRESS.pack(address.as_usize(), 0));

self.io.register(
source,
mio::Token(address.to_usize()),
ready,
mio::PollOpt::edge(),
)?;
self.io
.register(source, mio::Token(token), ready, mio::PollOpt::edge())?;

Ok(address)
Ok(shared)
}

/// Deregisters an I/O resource from the reactor.
pub(super) fn deregister_source(&self, source: &dyn Evented) -> io::Result<()> {
self.io.deregister(source)
}

pub(super) fn drop_source(&self, address: Address) {
self.io_dispatch.remove(address);
self.n_sources.fetch_sub(1, SeqCst);
}

/// Registers interest in the I/O resource associated with `token`.
pub(super) fn register(&self, token: Address, dir: Direction, w: Waker) {
let sched = self
.io_dispatch
.get(token)
.unwrap_or_else(|| panic!("IO resource for token {:?} does not exist!", token));

pub(super) fn register(&self, io: &slab::Ref<ScheduledIo>, dir: Direction, w: Waker) {
let waker = match dir {
Direction::Read => &sched.reader,
Direction::Write => &sched.writer,
Direction::Read => &io.reader,
Direction::Write => &io.writer,
};

waker.register(w);
Expand Down
72 changes: 29 additions & 43 deletions tokio/src/io/driver/scheduled_io.rs
@@ -1,47 +1,30 @@
use crate::loom::future::AtomicWaker;
use crate::loom::sync::atomic::AtomicUsize;
use crate::util::bit;
use crate::util::slab::{Address, Entry, Generation};
use crate::util::slab::Entry;

use std::sync::atomic::Ordering::{AcqRel, Acquire, SeqCst};
use std::sync::atomic::Ordering::{AcqRel, Acquire, Release};

/// Stored in the I/O driver resource slab.
#[derive(Debug)]
pub(crate) struct ScheduledIo {
/// Packs the resource's readiness with the resource's generation.
readiness: AtomicUsize,

/// Task waiting on read readiness
pub(crate) reader: AtomicWaker,

/// Task waiting on write readiness
pub(crate) writer: AtomicWaker,
}

const PACK: bit::Pack = bit::Pack::most_significant(Generation::WIDTH);

impl Entry for ScheduledIo {
fn generation(&self) -> Generation {
unpack_generation(self.readiness.load(SeqCst))
}

fn reset(&self, generation: Generation) -> bool {
let mut current = self.readiness.load(Acquire);

loop {
if unpack_generation(current) != generation {
return false;
}

let next = PACK.pack(generation.next().to_usize(), 0);
fn reset(&self) {
let state = self.readiness.load(Acquire);

match self
.readiness
.compare_exchange(current, next, AcqRel, Acquire)
{
Ok(_) => break,
Err(actual) => current = actual,
}
}
let generation = super::GENERATION.unpack(state);
let next = super::GENERATION.pack_lossy(generation + 1, 0);

drop(self.reader.take_waker());
drop(self.writer.take_waker());

true
self.readiness.store(next, Release);
}
}

Expand All @@ -56,6 +39,10 @@ impl Default for ScheduledIo {
}

impl ScheduledIo {
pub(crate) fn generation(&self) -> usize {
super::GENERATION.unpack(self.readiness.load(Acquire))
}

#[cfg(all(test, loom))]
/// Returns the current readiness value of this `ScheduledIo`, if the
/// provided `token` is still a valid access.
Expand Down Expand Up @@ -92,32 +79,35 @@ impl ScheduledIo {
/// Otherwise, this returns the previous readiness.
pub(crate) fn set_readiness(
&self,
address: Address,
token: Option<usize>,
f: impl Fn(usize) -> usize,
) -> Result<usize, ()> {
let generation = address.generation();

let mut current = self.readiness.load(Acquire);

loop {
// Check that the generation for this access is still the current
// one.
if unpack_generation(current) != generation {
return Err(());
let current_generation = super::GENERATION.unpack(current);

if let Some(token) = token {
// Check that the generation for this access is still the
// current one.
if super::GENERATION.unpack(token) != current_generation {
return Err(());
}
}

// Mask out the generation bits so that the modifying function
// doesn't see them.
let current_readiness = current & mio::Ready::all().as_usize();
let new = f(current_readiness);

debug_assert!(
new <= !PACK.max_value(),
new <= super::ADDRESS.max_value(),
"new readiness value would overwrite generation bits!"
);

match self.readiness.compare_exchange(
current,
PACK.pack(generation.to_usize(), new),
super::GENERATION.pack(current_generation, new),
AcqRel,
Acquire,
) {
Expand All @@ -135,7 +125,3 @@ impl Drop for ScheduledIo {
self.reader.wake();
}
}

fn unpack_generation(src: usize) -> Generation {
Generation::new(PACK.unpack(src))
}

0 comments on commit a04ab1a

Please sign in to comment.