From 8dec5096dff29b2dce352cf06f7d897284388adb Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Fri, 8 Mar 2024 22:31:51 -0800 Subject: [PATCH 1/3] Expand capability of off-path specialized hasher (#221) Signed-off-by: Tom Kaitchuck --- src/aes_hash.rs | 41 ++++++++++++++++++++++++++++++++--------- src/fallback_hash.rs | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/src/aes_hash.rs b/src/aes_hash.rs index 5ba295d..ffe0880 100644 --- a/src/aes_hash.rs +++ b/src/aes_hash.rs @@ -269,13 +269,26 @@ impl Hasher for AHasherU64 { } #[inline] - fn write_u128(&mut self, _i: u128) { - unreachable!("Specialized hasher was called with a different type of object") + fn write_u128(&mut self, i: u128) { + let i: [u64; 2] = i.convert(); + self.buffer = folded_multiply(i[0] ^ self.buffer, MULTIPLE); + self.pad = folded_multiply(i[1] ^ self.pad, MULTIPLE); } #[inline] - fn write_usize(&mut self, _i: usize) { - unreachable!("Specialized hasher was called with a different type of object") + #[cfg(any( + target_pointer_width = "64", + target_pointer_width = "32", + target_pointer_width = "16" + ))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); } } @@ -357,19 +370,29 @@ impl Hasher for AHasherStr { fn write_u8(&mut self, _i: u8) {} #[inline] - fn write_u16(&mut self, _i: u16) {} + fn write_u16(&mut self, i: u16) { + self.0.write_u16(i) + } #[inline] - fn write_u32(&mut self, _i: u32) {} + fn write_u32(&mut self, i: u32) { + self.0.write_u32(i) + } #[inline] - fn write_u64(&mut self, _i: u64) {} + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i) + } #[inline] - fn write_u128(&mut self, _i: u128) {} + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i) + } #[inline] - fn write_usize(&mut self, _i: usize) {} + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i) + } } #[cfg(test)] diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs index 6d76319..24023e0 100644 --- a/src/fallback_hash.rs +++ b/src/fallback_hash.rs @@ -211,7 +211,6 @@ impl Hasher for AHasherU64 { #[inline] fn finish(&self) -> u64 { folded_multiply(self.buffer, self.pad) - //self.buffer } #[inline] @@ -240,13 +239,26 @@ impl Hasher for AHasherU64 { } #[inline] - fn write_u128(&mut self, _i: u128) { - unreachable!("Specialized hasher was called with a different type of object") + fn write_u128(&mut self, i: u128) { + let i: [u64; 2] = i.convert(); + self.buffer = folded_multiply(i[0] ^ self.buffer, MULTIPLE); + self.pad = folded_multiply(i[1] ^ self.pad, MULTIPLE); } #[inline] - fn write_usize(&mut self, _i: usize) { - unreachable!("Specialized hasher was called with a different type of object") + #[cfg(any( + target_pointer_width = "64", + target_pointer_width = "32", + target_pointer_width = "16" + ))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); } } @@ -324,19 +336,29 @@ impl Hasher for AHasherStr { fn write_u8(&mut self, _i: u8) {} #[inline] - fn write_u16(&mut self, _i: u16) {} + fn write_u16(&mut self, i: u16) { + self.0.write_u16(i) + } #[inline] - fn write_u32(&mut self, _i: u32) {} + fn write_u32(&mut self, i: u32) { + self.0.write_u32(i) + } #[inline] - fn write_u64(&mut self, _i: u64) {} + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i) + } #[inline] - fn write_u128(&mut self, _i: u128) {} + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i) + } #[inline] - fn write_usize(&mut self, _i: usize) {} + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i) + } } #[cfg(test)] From af37d79eb9076c16b8930275bf7efe930fc2621d Mon Sep 17 00:00:00 2001 From: Ralph Giles Date: Tue, 19 Mar 2024 20:11:51 -0700 Subject: [PATCH 2/3] Update to criterion 0.5.1 (#223) * Update to criterion 0.5.1 Migrate to the latest version of the benchmark library. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9144c0c..2e8fdce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,7 @@ once_cell = { version = "1.18.0", default-features = false, features = ["alloc"] [dev-dependencies] no-panic = "0.1.10" -criterion = {version = "0.3.2", features = ["html_reports"] } +criterion = {version = "0.5.1", features = ["html_reports"] } seahash = "4.0" fnv = "1.0.5" fxhash = "0.2.1" From 0a0e4934ff3ca7d4daad4b5d974a85c8ce409f67 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 26 Mar 2024 08:13:08 -0700 Subject: [PATCH 3/3] issue 158: Add SmallState (#226) * Is an optional reduced size state. Signed-off-by: Tom Kaitchuck --- src/aes_hash.rs | 2 +- src/fallback_hash.rs | 3 +- src/lib.rs | 1 + src/random_state.rs | 199 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 181 insertions(+), 24 deletions(-) diff --git a/src/aes_hash.rs b/src/aes_hash.rs index ffe0880..c0aed7d 100644 --- a/src/aes_hash.rs +++ b/src/aes_hash.rs @@ -60,7 +60,7 @@ impl AHasher { } } - #[allow(unused)] // False positive + #[cfg(test)] pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { Self { enc: key1, diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs index 24023e0..ca465db 100644 --- a/src/fallback_hash.rs +++ b/src/fallback_hash.rs @@ -41,7 +41,8 @@ impl AHasher { } } - #[allow(unused)] // False positive + #[inline] + #[cfg(test)] pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { let key1: [u64; 2] = key1.convert(); let key2: [u64; 2] = key2.convert(); diff --git a/src/lib.rs b/src/lib.rs index c0173cb..66af806 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,6 +143,7 @@ pub mod random_state; mod specialize; pub use crate::random_state::RandomState; +pub use crate::random_state::SmallState; use core::hash::BuildHasher; use core::hash::Hash; diff --git a/src/random_state.rs b/src/random_state.rs index d0981c5..ebd12fa 100644 --- a/src/random_state.rs +++ b/src/random_state.rs @@ -30,6 +30,8 @@ use core::fmt; use core::hash::BuildHasher; use core::hash::Hasher; use core::marker::PhantomData; +use crate::convert::Convert; +use crate::operations::{folded_multiply}; pub(crate) const PI: [u64; 4] = [ 0x243f_6a88_85a3_08d3, @@ -154,14 +156,14 @@ impl RandomSource for DefaultRandomSource { fn gen_hasher_seed(&self) -> usize { let stack = self as *const _ as usize; let previous = self.counter.load(Ordering::Relaxed); - let new = previous.wrapping_add(stack); + let new = previous.wrapping_add(stack | 1); self.counter.store(new, Ordering::Relaxed); new } } else { fn gen_hasher_seed(&self) -> usize { let stack = self as *const _ as usize; - self.counter.fetch_add(stack, Ordering::Relaxed) + self.counter.fetch_add(stack | 1, Ordering::Relaxed) } } } @@ -254,12 +256,27 @@ pub struct RandomState { _h: PhantomData, } +/// Provides a Hasher factory similar to [RandomState] that uses less memory at the cost +/// of a slower `build_hasher` function. (Which is generally called once per item hashed) +/// In general [RandomState] should be preferred unless there is a need for reduced memory use. +#[derive(Clone)] +pub struct SmallState { + key: u64, + _h: PhantomData, +} + impl fmt::Debug for RandomState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad("RandomState { .. }") } } +impl fmt::Debug for SmallState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("SmallState { .. }") + } +} + impl RandomState { /// Create a new `RandomState` `BuildHasher` using random keys. /// @@ -269,7 +286,8 @@ impl RandomState { pub fn new() -> RandomState { let src = get_src(); let fixed = get_fixed_seeds(); - Self::from_keys(&fixed[0], &fixed[1], src.gen_hasher_seed()) + let mixed = Self::pre_mix_key(&fixed[0], src.gen_hasher_seed()); + Self::from_keys(&fixed[0], &fixed[1], mixed) } /// Create a new `RandomState` `BuildHasher` based on the provided seeds, but in such a way @@ -285,28 +303,30 @@ impl RandomState { pub fn generate_with(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { let src = get_src(); let fixed = get_fixed_seeds(); - RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], src.gen_hasher_seed()) - } - - fn from_keys(a: &[u64; 4], b: &[u64; 4], c: usize) -> RandomState { - let &[k0, k1, k2, k3] = a; - let mut hasher = AHasher::from_random_state(&RandomState { k0, k1, k2, k3, _h: PhantomData:: }); - hasher.write_usize(c); - let mix = |l: u64, r: u64| { - let mut h = hasher.clone(); - h.write_u64(l); - h.write_u64(r); - h.finish() - }; + let mixed = Self::pre_mix_key(&fixed[0], src.gen_hasher_seed()); + RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], mixed) + } + + #[inline] + fn pre_mix_key(a: &[u64; 4], c: usize) -> u64 { + let &[k0, k1, _k2, _k3] = a; + folded_multiply(k0 ^ c as u64, k1) + } + + #[inline] + fn from_keys(a: &[u64; 4], b: &[u64; 4], pre_mixed_key: u64) -> RandomState { + let &[_k0, _k1, k2, k3] = a; + let c1 = folded_multiply(pre_mixed_key, k2); + let c2 = folded_multiply(pre_mixed_key, k3); RandomState { - k0: mix(b[0], b[2]), - k1: mix(b[1], b[3]), - k2: mix(b[2], b[1]), - k3: mix(b[3], b[0]), + k0: (c1 ^ b[0]).wrapping_add(b[2]), + k1: (c1 ^ b[1]).wrapping_add(b[3]), + k2: (c2 ^ b[2]).wrapping_add(b[1]), + k3: (c2 ^ b[3]).wrapping_add(b[0]), _h: PhantomData::default(), } } - + /// Internal. Used by Default. #[inline] pub(crate) fn with_fixed_keys() -> RandomState { @@ -324,7 +344,8 @@ impl RandomState { #[inline] pub fn with_seed(key: usize) -> RandomState { let fixed = get_fixed_seeds(); - RandomState::from_keys(&fixed[0], &fixed[1], key) + let mixed = RandomState::::pre_mix_key(&fixed[0], key); + RandomState::from_keys(&fixed[0], &fixed[1], mixed) } /// Allows for explicitly setting the seeds to used. @@ -375,6 +396,39 @@ impl RandomState { } } +impl SmallState { + /// Create a new `SmallState` `BuildHasher` using random keys. + /// + /// Each instance will have a unique set of keys derived from [RandomSource]. + /// + #[inline] + pub fn new() -> SmallState { + let fixed = get_fixed_seeds(); + let mixed = RandomState::::pre_mix_key(&fixed[0], get_src().gen_hasher_seed()); + SmallState { + key: mixed, + _h: Default::default(), + } + } + + /// Build a `SmallState` from a single key. The provided key does not need to be of high quality, + /// but all `SmallState`s created from the same key will produce identical hashers. + /// (In contrast to `new` above) + /// + /// This allows for explicitly setting the seed to be used. + /// + /// Note: This method does not require the provided seed to be strong. + #[inline] + pub fn with_seed(key: usize) -> SmallState { + let fixed = get_fixed_seeds(); + let mixed = RandomState::::pre_mix_key(&fixed[0], key); + SmallState { + key: mixed, + _h: Default::default(), + } + } +} + /// Creates an instance of RandomState using keys obtained from the random number generator. /// Each instance created in this way will have a unique set of keys. (But the resulting instance /// can be used to create many hashers each or which will have the same keys.) @@ -392,6 +446,19 @@ impl Default for RandomState { } } +/// Creates an instance of SmallState using keys obtained from the random number generator. +/// Each instance created in this way will have a unique set of keys. (But the resulting instance +/// can be used to create many hashers each or which will have the same keys.) +/// +/// This is the same as [SmallState::new()] +#[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] +impl Default for SmallState { + #[inline] + fn default() -> Self { + Self::new() + } +} + impl BuildHasher for RandomState { type Hasher = AHasher; @@ -478,6 +545,94 @@ impl BuildHasher for RandomState { } } +impl BuildHasher for SmallState { + type Hasher = AHasher; + + /// Constructs a new [AHasher] with keys based on this [SmallState] object. + /// This means that two different [SmallState]s will will generate + /// [AHasher]s that will return different hashcodes, but [Hasher]s created from the same [BuildHasher] + /// will generate the same hashes for the same input data. + /// + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use ahash::{AHasher, SmallState}; + use std::hash::{Hasher, BuildHasher}; + + let build_hasher = SmallState::::new(); + let mut hasher_1 = build_hasher.build_hasher(); + let mut hasher_2 = build_hasher.build_hasher(); + + hasher_1.write_u32(1234); + hasher_2.write_u32(1234); + + assert_eq!(hasher_1.finish(), hasher_2.finish()); + + let other_build_hasher = SmallState::::new(); + let mut different_hasher = other_build_hasher.build_hasher(); + different_hasher.write_u32(1234); + assert_ne!(different_hasher.finish(), hasher_1.finish()); +``` + "## + )] + /// [Hasher]: std::hash::Hasher + /// [BuildHasher]: std::hash::BuildHasher + /// [HashMap]: std::collections::HashMap + #[inline] + fn build_hasher(&self) -> AHasher { + let fixed = get_fixed_seeds(); + AHasher::from_random_state(&RandomState::::from_keys(&fixed[0], &fixed[1], self.key)) + } + + /// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash: + /// For example: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::BuildHasher; + use ahash::SmallState; + + let hash_builder = SmallState::::new(); + let hash = hash_builder.hash_one("Some Data"); +``` + "## + )] + /// This is similar to: + #[cfg_attr( + feature = "std", + doc = r##" # Examples +``` + use std::hash::{BuildHasher, Hash, Hasher}; + use ahash::SmallState; + + let hash_builder = SmallState::::new(); + let mut hasher = hash_builder.build_hasher(); + "Some Data".hash(&mut hasher); + let hash = hasher.finish(); +``` + "## + )] + /// (Note that these two ways to get a hash may not produce the same value for the same data) + /// + /// This is intended as a convenience for code which *consumes* hashes, such + /// as the implementation of a hash table or in unit tests that check + /// whether a custom [`Hash`] implementation behaves as expected. + /// + /// This must not be used in any code which *creates* hashes, such as in an + /// implementation of [`Hash`]. The way to create a combined hash of + /// multiple values is to call [`Hash::hash`] multiple times using the same + /// [`Hasher`], not to call this method repeatedly and combine the results. + #[cfg(feature = "specialize")] + #[inline] + fn hash_one(&self, x: V) -> u64 { + use crate::specialize::CallHasher; + let fixed = get_fixed_seeds(); + T::get_hash(&x, &RandomState::::from_keys(&fixed[0], &fixed[1], self.key)) + } +} + #[cfg(test)] mod test { use super::*;