diff --git a/Cargo.toml b/Cargo.toml index 0e83f1a6f..67bf41dd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,11 +32,23 @@ typenum = { version = "1.16", features = ["const-generics"] } const-default = { version = "1", optional = true, default-features = false } serde = { version = "1.0", optional = true, default-features = false } zeroize = { version = "1", optional = true, default-features = false } +faster-hex = { version = "0.8", optional = true, default-features = false } [dev-dependencies] # this can't yet be made optional, see https://github.com/rust-lang/cargo/issues/1596 serde_json = "1.0" bincode = "1.0" +criterion = { version = "0.5", features = ["html_reports"] } +rand = "0.8" + +[[bench]] +name = "hex" +harness = false + +[profile.bench] +opt-level = 3 +lto = 'fat' +codegen-units = 1 [package.metadata.docs.rs] # all but "internals", don't show those on docs.rs diff --git a/benches/hex.rs b/benches/hex.rs new file mode 100644 index 000000000..ff879c9ae --- /dev/null +++ b/benches/hex.rs @@ -0,0 +1,46 @@ +use criterion::{ + criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, +}; +use generic_array::{typenum::*, ArrayLength, GenericArray}; +use rand::RngCore; + +use std::{fmt::UpperHex, io::Write}; + +fn criterion_benchmark(c: &mut Criterion) { + let mut hex = c.benchmark_group("hex"); + + let mut rng = rand::thread_rng(); + + macro_rules! all_hex_benches { + ($($len:ty,)*) => { + $(bench_hex::<$len>(&mut rng, &mut hex);)* + } + } + + all_hex_benches!( + U1, U2, U4, U8, U12, U15, U16, U32, U64, U100, U128, U160, U255, U256, U500, U512, U900, + U1023, U1024, Sum, U2048, U4096, Prod, U10000, + ); + + hex.finish(); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); + +fn bench_hex(mut rng: impl RngCore, g: &mut BenchmarkGroup<'_, WallTime>) +where + GenericArray: UpperHex, +{ + let mut fixture = Box::>::default(); + rng.fill_bytes(fixture.as_mut_slice()); + + g.bench_function(format!("N{:08}", N::USIZE), |b| { + let mut out = Vec::with_capacity(N::USIZE * 2); + + b.iter(|| { + _ = write!(out, "{:X}", &*fixture); + out.clear(); + }); + }); +} diff --git a/src/hex.rs b/src/hex.rs index a132d6646..c86056c72 100644 --- a/src/hex.rs +++ b/src/hex.rs @@ -18,12 +18,40 @@ use typenum::*; use crate::{ArrayLength, GenericArray}; -static LOWER_CHARS: [u8; 16] = *b"0123456789abcdef"; -static UPPER_CHARS: [u8; 16] = *b"0123456789ABCDEF"; +#[inline(always)] +fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) { + if dst.len() < src.len() * 2 { + unsafe { core::hint::unreachable_unchecked() }; + } + + let alphabet = match UPPER { + true => b"0123456789ABCDEF", + false => b"0123456789abcdef", + }; + + dst.chunks_exact_mut(2).zip(src).for_each(|(s, c)| { + s[0] = alphabet[(c >> 4) as usize]; + s[1] = alphabet[(c & 0xF) as usize]; + }); +} + +#[inline] +fn hex_encode(src: &[u8], dst: &mut [u8]) { + debug_assert!(dst.len() >= (src.len() * 2)); -fn generic_hex( + #[cfg(any(miri, not(feature = "faster-hex")))] + hex_encode_fallback::(src, dst); + + // the `unwrap_unchecked` is to avoid the length checks + #[cfg(all(feature = "faster-hex", not(miri)))] + match UPPER { + true => unsafe { faster_hex::hex_encode_upper(src, dst).unwrap_unchecked() }, + false => unsafe { faster_hex::hex_encode(src, dst).unwrap_unchecked() }, + }; +} + +fn generic_hex( arr: &GenericArray, - alphabet: &[u8; 16], // use fixed-length array to avoid slice index checks f: &mut fmt::Formatter<'_>, ) -> fmt::Result where @@ -36,32 +64,43 @@ where _ => max_digits, }; - let max_hex = (max_digits >> 1) + (max_digits & 1); + // ceil(max_digits / 2) + let max_bytes = (max_digits >> 1) + (max_digits & 1); + + let input = { + // LLVM can't seem to automatically prove this + if max_bytes > N::USIZE { + unsafe { core::hint::unreachable_unchecked() }; + } + + &arr[..max_bytes] + }; if N::USIZE <= 1024 { - // For small arrays use a stack allocated - // buffer of 2x number of bytes - let mut res = GenericArray::>::default(); + // For small arrays use a stack allocated buffer of 2x number of bytes + let mut buf = GenericArray::>::default(); - arr.iter().take(max_hex).enumerate().for_each(|(i, c)| { - res[i * 2] = alphabet[(c >> 4) as usize]; - res[i * 2 + 1] = alphabet[(c & 0xF) as usize]; - }); + if N::USIZE < 16 { + // for the smallest inputs, don't bother limiting to max_bytes, + // just process the entire array. When "faster-hex" is enabled, + // this avoids its logic that winds up going to the fallback anyway + hex_encode_fallback::(arr, &mut buf); + } else { + hex_encode::(input, &mut buf); + } - f.write_str(unsafe { str::from_utf8_unchecked(&res[..max_digits]) })?; + f.write_str(unsafe { str::from_utf8_unchecked(buf.get_unchecked(..max_digits)) })?; } else { // For large array use chunks of up to 1024 bytes (2048 hex chars) let mut buf = [0u8; 2048]; let mut digits_left = max_digits; - for chunk in arr[..max_hex].chunks(1024) { - chunk.iter().enumerate().for_each(|(i, c)| { - buf[i * 2] = alphabet[(c >> 4) as usize]; - buf[i * 2 + 1] = alphabet[(c & 0xF) as usize]; - }); + for chunk in input.chunks(1024) { + hex_encode::(chunk, &mut buf); let n = min(chunk.len() * 2, digits_left); - f.write_str(unsafe { str::from_utf8_unchecked(&buf[..n]) })?; + // SAFETY: n will always be within bounds due to the above min + f.write_str(unsafe { str::from_utf8_unchecked(buf.get_unchecked(..n)) })?; digits_left -= n; } } @@ -74,7 +113,7 @@ where Sum: ArrayLength, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - generic_hex(self, &LOWER_CHARS, f) + generic_hex::<_, false>(self, f) } } @@ -84,6 +123,6 @@ where Sum: ArrayLength, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - generic_hex(self, &UPPER_CHARS, f) + generic_hex::<_, true>(self, f) } }