Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unsafe removal #1488

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmarks/criterion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ harness = false
[dependencies]
criterion = "0.3.0"
sled = { path = "../.." }

[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
jemallocator = "0.3.2"
12 changes: 4 additions & 8 deletions benchmarks/criterion/benches/sled.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
use criterion::{criterion_group, criterion_main, Criterion};

use jemallocator::Jemalloc;

use sled::Config;

#[cfg_attr(
// only enable jemalloc on linux and macos by default
any(target_os = "linux", target_os = "macos"),
global_allocator
)]
static ALLOC: Jemalloc = Jemalloc;
// only enable jemalloc on linux and macos by default
#[cfg(any(target_os = "linux", target_os = "macos"))]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

fn counter() -> usize {
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
Expand Down
56 changes: 0 additions & 56 deletions src/fastcmp.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ mod context;
mod db;
mod dll;
mod ebr;
mod fastcmp;
mod fastlock;
mod fnv;
mod histogram;
Expand Down Expand Up @@ -302,7 +301,6 @@ use {
pin as crossbeam_pin, Atomic, Guard as CrossbeamGuard, Owned,
Shared,
},
fastcmp::fastcmp,
lru::Lru,
meta::Meta,
node::Node,
Expand Down
5 changes: 2 additions & 3 deletions src/lru.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
borrow::{Borrow, BorrowMut},
convert::TryFrom,
hash::{Hash, Hasher},
mem::MaybeUninit,
sync::atomic::{AtomicPtr, AtomicUsize, Ordering},
};

Expand Down Expand Up @@ -38,7 +37,7 @@ impl Default for AccessBlock {
fn default() -> AccessBlock {
AccessBlock {
len: AtomicUsize::new(0),
block: unsafe { MaybeUninit::zeroed().assume_init() },
block: [(); MAX_QUEUE_ITEMS].map(|_| AtomicU64::default() ),
next: AtomicPtr::default(),
}
}
Expand All @@ -53,7 +52,7 @@ impl AccessBlock {
fn new(item: CacheAccess) -> AccessBlock {
let mut ret = AccessBlock {
len: AtomicUsize::new(1),
block: unsafe { MaybeUninit::zeroed().assume_init() },
block: [(); MAX_QUEUE_ITEMS].map(|_| AtomicU64::default() ),
next: AtomicPtr::default(),
};
ret.block[0] = AtomicU64::from(u64::from(item));
Expand Down
133 changes: 61 additions & 72 deletions src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ fn uninitialized_node(len: usize) -> Inner {
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct Header {
// NB always lay out fields from largest to smallest to properly pack the struct
// NB always lay out fields from largest to smallest to properly pack the
// struct
pub next: Option<NonZeroU64>,
pub merging_child: Option<NonZeroU64>,
lo_len: u64,
Expand Down Expand Up @@ -96,16 +97,12 @@ fn apply_computed_distance(mut buf: &mut [u8], mut distance: usize) {

// TODO change to u64 or u128 output
// This function has several responsibilities:
// * `find` will call this when looking for the
// proper child pid on an index, with slice
// lengths that may or may not match
// * `KeyRef::Ord` and `KeyRef::distance` call
// this while performing node iteration,
// again with possibly mismatching slice
// lengths. Merging nodes together, or
// merging overlays into inner nodes
// will rely on this functionality, and
// it's possible for the lengths to vary.
// * `find` will call this when looking for the proper child pid on an index,
// with slice lengths that may or may not match
// * `KeyRef::Ord` and `KeyRef::distance` call this while performing node
// iteration, again with possibly mismatching slice lengths. Merging nodes
// together, or merging overlays into inner nodes will rely on this
// functionality, and it's possible for the lengths to vary.
//
// This is not a general-purpose function. It
// is not possible to determine distances when
Expand Down Expand Up @@ -412,9 +409,10 @@ impl<'a> Iterator for Iter<'a> {
(Some((_, Some(_))), None) => {
log::trace!("src/node.rs:114");
log::trace!("iterator returning {:?}", self.next_a);
return self.next_a.take().map(|(k, v)| {
(KeyRef::Slice(k), v.unwrap().as_ref())
});
return self
.next_a
.take()
.map(|(k, v)| (KeyRef::Slice(k), v.unwrap().as_ref()));
}
(Some((k_a, v_a_opt)), Some((k_b, _))) => {
let cmp = KeyRef::Slice(k_a).cmp(&k_b);
Expand Down Expand Up @@ -511,9 +509,10 @@ impl<'a> DoubleEndedIterator for Iter<'a> {
(Some((_, Some(_))), None) => {
log::trace!("src/node.rs:483");
log::trace!("iterator returning {:?}", self.next_back_a);
return self.next_back_a.take().map(|(k, v)| {
(KeyRef::Slice(k), v.unwrap().as_ref())
});
return self
.next_back_a
.take()
.map(|(k, v)| (KeyRef::Slice(k), v.unwrap().as_ref()));
}
(Some((k_a, Some(_))), Some((k_b, _))) if k_b > *k_a => {
log::trace!("src/node.rs:508");
Expand All @@ -522,18 +521,20 @@ impl<'a> DoubleEndedIterator for Iter<'a> {
}
(Some((k_a, Some(_))), Some((k_b, _))) if k_b < *k_a => {
log::trace!("iterator returning {:?}", self.next_back_a);
return self.next_back_a.take().map(|(k, v)| {
(KeyRef::Slice(k), v.unwrap().as_ref())
});
return self
.next_back_a
.take()
.map(|(k, v)| (KeyRef::Slice(k), v.unwrap().as_ref()));
}
(Some((k_a, Some(_))), Some((k_b, _))) if k_b == *k_a => {
// prefer overlay, discard node value
self.next_back_b.take();
log::trace!("src/node.rs:520");
log::trace!("iterator returning {:?}", self.next_back_a);
return self.next_back_a.take().map(|(k, v)| {
(KeyRef::Slice(k), v.unwrap().as_ref())
});
return self
.next_back_a
.take()
.map(|(k, v)| (KeyRef::Slice(k), v.unwrap().as_ref()));
}
_ => unreachable!(
"did not expect combination a: {:?} b: {:?}",
Expand Down Expand Up @@ -905,8 +906,9 @@ impl Node {
Some(Node { overlay: Default::default(), inner: new_inner })
}

/// `node_kv_pair` returns either the existing (node/key, value, current offset) tuple or
/// (node/key, none, future offset) where a node/key is node level encoded key.
/// `node_kv_pair` returns either the existing (node/key, value, current
/// offset) tuple or (node/key, none, future offset) where a node/key is
/// node level encoded key.
pub(crate) fn node_kv_pair<'a>(
&'a self,
key: &'a [u8],
Expand Down Expand Up @@ -949,7 +951,7 @@ impl Node {
return Some((
self.prefix_decode(self.inner.index_key(idx)),
self.inner.index_value(idx).into(),
))
));
}
Err(idx) => idx,
};
Expand Down Expand Up @@ -1018,7 +1020,7 @@ impl Node {
return Some((
self.prefix_decode(self.inner.index_key(idx)),
self.inner.index_value(idx).into(),
))
));
}
Err(idx) => idx,
};
Expand Down Expand Up @@ -1088,7 +1090,12 @@ impl Node {
let pid_bytes = self.index_value(idx);
let pid = u64::from_le_bytes(pid_bytes.try_into().unwrap());

log::trace!("index_next_node for key {:?} returning pid {} after searching node {:?}", key, pid, self);
log::trace!(
"index_next_node for key {:?} returning pid {} after searching node {:?}",
key,
pid,
self
);
(is_leftmost, pid)
}

Expand Down Expand Up @@ -1741,29 +1748,10 @@ impl Inner {
* (tf!(size_of::<usize>(), u32)
- u32::from(self.offset_bytes)));

let mut tmp = std::mem::MaybeUninit::<usize>::uninit();
let len = size_of::<usize>();

// we use unsafe code here because it cuts a significant number of
// CPU cycles on a simple insertion workload compared to using the
// more idiomatic approach of copying the correct number of bytes into
// a buffer initialized with zeroes. the seemingly "less" unsafe
// approach of using ptr::copy_nonoverlapping did not improve matters.
// using a match statement on offset_bytes and performing simpler
// casting for one or two bytes slowed things down due to increasing
// code size. this approach is branch-free and cut CPU usage of this
// function from 7-11% down to 0.5-2% in a monotonic insertion workload.
#[allow(unsafe_code)]
unsafe {
let ptr: *const u8 = self.ptr().add(start);
std::ptr::copy_nonoverlapping(
ptr,
tmp.as_mut_ptr() as *mut u8,
len,
);
*tmp.as_mut_ptr() &= mask;
tmp.assume_init()
}
usize::from_ne_bytes(self.buf()[start..start + len].try_into().unwrap())
& mask
}

fn set_offset(&mut self, index: usize, offset: usize) {
Expand Down Expand Up @@ -2217,10 +2205,17 @@ impl Inner {
{
// search key does not evenly fit based on
// our fixed stride length
log::trace!("failed to find, search: {:?} lo: {:?} \
log::trace!(
"failed to find, search: {:?} lo: {:?} \
prefix_len: {} distance: {} stride: {} offset: {} children: {}, node: {:?}",
key, self.lo(), self.prefix_len, distance,
stride.get(), offset, self.children, self
key,
self.lo(),
self.prefix_len,
distance,
stride.get(),
offset,
self.children,
self
);
return Err((offset + 1).min(self.children()));
}
Expand All @@ -2239,7 +2234,7 @@ impl Inner {
let mid = left + size / 2;

let l = self.index_key(mid);
let cmp = crate::fastcmp(l.unwrap_slice(), key);
let cmp = l.unwrap_slice().cmp(key);

if cmp == Less {
left = mid + 1;
Expand All @@ -2263,19 +2258,19 @@ impl Inner {
fn iter_keys(
&self,
) -> impl Iterator<Item = KeyRef<'_>>
+ ExactSizeIterator
+ DoubleEndedIterator
+ Clone {
+ ExactSizeIterator
+ DoubleEndedIterator
+ Clone {
(0..self.children()).map(move |idx| self.index_key(idx))
}

fn iter_index_pids(
&self,
) -> impl '_
+ Iterator<Item = u64>
+ ExactSizeIterator
+ DoubleEndedIterator
+ Clone {
+ Iterator<Item = u64>
+ ExactSizeIterator
+ DoubleEndedIterator
+ Clone {
assert!(self.is_index);
self.iter_values().map(move |pid_bytes| {
u64::from_le_bytes(pid_bytes.try_into().unwrap())
Expand Down Expand Up @@ -2308,21 +2303,13 @@ impl Inner {
pub(crate) fn hi(&self) -> Option<&[u8]> {
let start = tf!(self.lo_len) + size_of::<Header>();
let end = start + tf!(self.hi_len);
if start == end {
None
} else {
Some(&self.as_ref()[start..end])
}
if start == end { None } else { Some(&self.as_ref()[start..end]) }
}

fn hi_mut(&mut self) -> Option<&mut [u8]> {
let start = tf!(self.lo_len) + size_of::<Header>();
let end = start + tf!(self.hi_len);
if start == end {
None
} else {
Some(&mut self.as_mut()[start..end])
}
if start == end { None } else { Some(&mut self.as_mut()[start..end]) }
}

fn index_key(&self, idx: usize) -> KeyRef<'_> {
Expand Down Expand Up @@ -3000,7 +2987,8 @@ mod test {

#[test]
fn node_bug_02() {
// postmortem: the test code had some issues with handling invalid keys for nodes
// postmortem: the test code had some issues with handling invalid keys
// for nodes
let node = Inner::new(
&[47, 97][..],
None,
Expand Down Expand Up @@ -3057,7 +3045,8 @@ mod test {
#[test]
fn node_bug_05() {
// postmortem: `prop_indexable` did not account for the requirement
// of feeding sorted items that are >= the lo key to the Node::new method.
// of feeding sorted items that are >= the lo key to the Node::new
// method.
assert!(prop_indexable(
vec![1],
vec![],
Expand Down