Improve fallback speed when folded multiply is not available (#134)

* Use psudo-folded-multiply to simplify code Signed-off-by: Tom Kaitchuck <Tom.Kaitchuck@gmail.com>
tkaitchuck · Oct 25, 2022 · 8eeeabc · 8eeeabc
1 parent e9c6735
commit 8eeeabc
Show file tree

Hide file tree

Showing 13 changed files with 1,306 additions and 111 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -148,4 +148,4 @@ jobs:
       - uses: actions-rs/cargo@v1
         with:
           command: check
-          args: --target wasm32-unknown-unknown
+          args: --target wasm32-unknown-unknown --no-default-features
diff --git a/build.rs b/build.rs
@@ -20,5 +20,4 @@ fn main() {
     {
         println!("cargo:rustc-cfg=feature=\"folded_multiply\"");
     }
-
 }
diff --git a/smhasher/fallbackNoFoldedOutput.txt b/smhasher/fallbackNoFoldedOutput.txt
diff --git a/src/aes_hash.rs b/src/aes_hash.rs
@@ -1,8 +1,8 @@
 use crate::convert::*;
 use crate::operations::*;
+use crate::random_state::PI;
 use crate::RandomState;
 use core::hash::Hasher;
-use crate::random_state::PI;
 
 /// A `Hasher` for hashing an arbitrary stream of bytes.
 ///
@@ -68,7 +68,6 @@ impl AHasher {
         }
     }
 
-
     #[inline]
     pub(crate) fn from_random_state(rand_state: &RandomState) -> Self {
         let key1 = [rand_state.k0, rand_state.k1].convert();
@@ -128,7 +127,11 @@ impl Hasher for AHasher {
     }
 
     #[inline]
-    #[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))]
+    #[cfg(any(
+        target_pointer_width = "64",
+        target_pointer_width = "32",
+        target_pointer_width = "16"
+    ))]
     fn write_usize(&mut self, i: usize) {
         self.write_u64(i as u64);
     }
@@ -317,7 +320,7 @@ pub(crate) struct AHasherStr(pub AHasher);
 impl Hasher for AHasherStr {
     #[inline]
     fn finish(&self) -> u64 {
-        let result : [u64; 2] = self.0.enc.convert();
+        let result: [u64; 2] = self.0.enc.convert();
         result[0]
     }
 
@@ -428,4 +431,3 @@ mod tests {
         assert_eq!(bytes, 0x6464646464646464);
     }
 }
-
diff --git a/src/convert.rs b/src/convert.rs
@@ -7,17 +7,13 @@ macro_rules! convert {
         impl Convert<$b> for $a {
             #[inline(always)]
             fn convert(self) -> $b {
-                unsafe {
-                    core::mem::transmute::<$a, $b>(self)
-                }
+                unsafe { core::mem::transmute::<$a, $b>(self) }
             }
         }
         impl Convert<$a> for $b {
             #[inline(always)]
             fn convert(self) -> $a {
-                unsafe {
-                    core::mem::transmute::<$b, $a>(self)
-                }
+                unsafe { core::mem::transmute::<$b, $a>(self) }
             }
         }
     };

diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs
@@ -6,8 +6,6 @@ use crate::random_state::PI;
 use crate::RandomState;
 use core::hash::Hasher;
 
-
-
 const ROT: u32 = 23; //17
 
 /// A `Hasher` for hashing an arbitrary stream of bytes.
@@ -94,19 +92,10 @@ impl AHasher {
     /// attacker somehow knew part of (but not all) the contents of the buffer before hand,
     /// they would not be able to predict any of the bits in the buffer at the end.
     #[inline(always)]
-    #[cfg(feature = "folded_multiply")]
     fn update(&mut self, new_data: u64) {
         self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE);
     }
 
-    #[inline(always)]
-    #[cfg(not(feature = "folded_multiply"))]
-    fn update(&mut self, new_data: u64) {
-        let d1 = (new_data ^ self.buffer).wrapping_mul(MULTIPLE);
-        self.pad = (self.pad ^ d1).rotate_left(8).wrapping_mul(MULTIPLE);
-        self.buffer = (self.buffer ^ self.pad).rotate_left(24);
-    }
-
     /// Similar to the above this function performs an update using a "folded multiply".
     /// However it takes in 128 bits of data instead of 64. Both halves must be masked.
     ///
@@ -119,21 +108,12 @@ impl AHasher {
     /// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require
     /// knowing the keys.
     #[inline(always)]
-    #[cfg(feature = "folded_multiply")]
     fn large_update(&mut self, new_data: u128) {
         let block: [u64; 2] = new_data.convert();
         let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]);
         self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT);
     }
 
-    #[inline(always)]
-    #[cfg(not(feature = "folded_multiply"))]
-    fn large_update(&mut self, new_data: u128) {
-        let block: [u64; 2] = new_data.convert();
-        self.update(block[0] ^ self.extra_keys[0]);
-        self.update(block[1] ^ self.extra_keys[1]);
-    }
-
     #[inline]
     #[cfg(feature = "specialize")]
     fn short_finish(&self) -> u64 {
@@ -171,7 +151,11 @@ impl Hasher for AHasher {
     }
 
     #[inline]
-    #[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))]
+    #[cfg(any(
+        target_pointer_width = "64",
+        target_pointer_width = "32",
+        target_pointer_width = "16"
+    ))]
     fn write_usize(&mut self, i: usize) {
         self.write_u64(i as u64);
     }
@@ -209,18 +193,10 @@ impl Hasher for AHasher {
     }
 
     #[inline]
-    #[cfg(feature = "folded_multiply")]
     fn finish(&self) -> u64 {
         let rot = (self.buffer & 63) as u32;
         folded_multiply(self.buffer, self.pad).rotate_left(rot)
     }
-
-    #[inline]
-    #[cfg(not(feature = "folded_multiply"))]
-    fn finish(&self) -> u64 {
-        let rot = (self.buffer & 63) as u32;
-        (self.buffer.wrapping_mul(MULTIPLE) ^ self.pad).rotate_left(rot)
-    }
 }
 
 #[cfg(feature = "specialize")]
@@ -339,8 +315,7 @@ impl Hasher for AHasherStr {
             self.0.write(bytes)
         } else {
             let value = read_small(bytes);
-            self.0.buffer = folded_multiply(value[0] ^ self.0.buffer,
-                                           value[1] ^ self.0.extra_keys[1]);
+            self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, value[1] ^ self.0.extra_keys[1]);
             self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64);
         }
     }

diff --git a/src/hash_quality_test.rs b/src/hash_quality_test.rs
@@ -147,7 +147,13 @@ fn assert_each_byte_differs(num: u64, base: u64, alternitives: Vec<u64>) {
     for alternitive in alternitives {
         changed_bits |= base ^ alternitive
     }
-    assert_eq!(core::u64::MAX, changed_bits, "Bits changed: {:x} on num: {:?}", changed_bits, num);
+    assert_eq!(
+        core::u64::MAX,
+        changed_bits,
+        "Bits changed: {:x} on num: {:?}",
+        changed_bits,
+        num
+    );
 }
 
 fn test_finish_is_consistent<T: Hasher>(constructor: impl Fn(u128, u128) -> T) {
@@ -273,11 +279,19 @@ fn test_padding_doesnot_collide<T: Hasher>(hasher: impl Fn() -> T) {
                 let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish());
                 assert!(
                     same_bytes <= 3,
-                    "{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish()
+                    "{} bytes of {} -> {:x} vs {:x}",
+                    num,
+                    c,
+                    value,
+                    long.finish()
                 );
                 assert!(
                     same_nibbles <= 8,
-                    "{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish()
+                    "{} bytes of {} -> {:x} vs {:x}",
+                    num,
+                    c,
+                    value,
+                    long.finish()
                 );
                 let flipped_bits = (value ^ long.finish()).count_ones();
                 assert!(flipped_bits > 10);
@@ -370,7 +384,7 @@ mod fallback_tests {
     fn fallback_keys_affect_every_byte() {
         //For fallback second key is not used in every hash.
         #[cfg(all(not(feature = "specialize"), feature = "folded_multiply"))]
-            test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a));
+        test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a));
         test_keys_affect_every_byte("", |a, b| AHasher::new_with_keys(a ^ b, a));
         test_keys_affect_every_byte((0, 0), |a, b| AHasher::new_with_keys(a ^ b, a));
     }
@@ -397,7 +411,12 @@ mod fallback_tests {
 ///Basic sanity tests of the cypto properties of aHash.
 #[cfg(any(
     all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)),
-    all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd")
+    all(
+        any(target_arch = "arm", target_arch = "aarch64"),
+        any(target_feature = "aes", target_feature = "crypto"),
+        not(miri),
+        feature = "stdsimd"
+    )
 ))]
 #[cfg(test)]
 mod aes_tests {
@@ -460,7 +479,7 @@ mod aes_tests {
     #[test]
     fn aes_keys_affect_every_byte() {
         #[cfg(not(feature = "specialize"))]
-            test_keys_affect_every_byte(0, AHasher::test_with_keys);
+        test_keys_affect_every_byte(0, AHasher::test_with_keys);
         test_keys_affect_every_byte("", AHasher::test_with_keys);
         test_keys_affect_every_byte((0, 0), AHasher::test_with_keys);
     }

diff --git a/src/lib.rs b/src/lib.rs
@@ -8,8 +8,10 @@
 //!
 //! aHash uses the hardware AES instruction on x86 processors to provide a keyed hash function.
 //! aHash is not a cryptographically secure hash.
-//! 
-#![cfg_attr(any(feature = "compile-time-rng", feature = "runtime-rng"), doc = r##"
+//!
+#![cfg_attr(
+    any(feature = "compile-time-rng", feature = "runtime-rng"),
+    doc = r##"
 # Example
 ```
 use ahash::{AHasher, RandomState};
@@ -18,8 +20,11 @@ use std::collections::HashMap;
 let mut map: HashMap<i32, i32, RandomState> = HashMap::default();
 map.insert(12, 34);
 ```
-"##)]
-#![cfg_attr(feature = "std", doc = r##"
+"##
+)]
+#![cfg_attr(
+    feature = "std",
+    doc = r##"
 For convenience, both new-type wrappers and type aliases are provided. The new type wrappers are called called `AHashMap` and `AHashSet`. These do the same thing with slightly less typing.
 The type aliases are called `ahash::HashMap`, `ahash::HashSet` are also provided and alias the
 std::[HashMap] and std::[HashSet]. Why are there two options? The wrappers are convenient but
@@ -39,7 +44,8 @@ map.insert(12, 34);
 let mut set = ahash::HashSet::with_capacity(10);
 set.insert(10);
 ```
-"##)]
+"##
+)]
 #![deny(clippy::correctness, clippy::complexity, clippy::perf)]
 #![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)]
 #![cfg_attr(all(not(test), not(feature = "std")), no_std)]
@@ -265,10 +271,10 @@ impl<B: BuildHasher> BuildHasherExt for B {
 #[cfg(test)]
 mod test {
     use crate::convert::Convert;
+    use crate::specialize::CallHasher;
     use crate::*;
     use std::collections::HashMap;
     use std::hash::Hash;
-    use crate::specialize::CallHasher;
 
     #[test]
     fn test_ahash_alias_map_construction() {

diff --git a/src/operations.rs b/src/operations.rs
@@ -1,6 +1,6 @@
 use crate::convert::*;
 
-///This constant come from Kunth's prng (Empirically it works better than those from splitmix32).
+///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32).
 pub(crate) const MULTIPLE: u64 = 6364136223846793005;
 
 /// This is a constant with a lot of special properties found by automated search.
@@ -11,11 +11,19 @@ const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
 //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;
 
 #[inline(always)]
+#[cfg(feature = "folded_multiply")]
 pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
     let result = (s as u128).wrapping_mul(by as u128);
     ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
 }
 
+#[inline(always)]
+#[cfg(not(feature = "folded_multiply"))]
+pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
+    let b1 = s.wrapping_mul(by.swap_bytes());
+    let b2 = s.swap_bytes().wrapping_mul(!by);
+    b1 ^ b2.swap_bytes()
+}
 
 /// Given a small (less than 8 byte slice) returns the same data stored in two u32s.
 /// (order of and non-duplication of bytes is NOT guaranteed)
@@ -104,14 +112,19 @@ pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
     }
 }
 
-#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd"))]
+#[cfg(all(
+    any(target_arch = "arm", target_arch = "aarch64"),
+    any(target_feature = "aes", target_feature = "crypto"),
+    not(miri),
+    feature = "stdsimd"
+))]
 #[allow(unused)]
 #[inline(always)]
 pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
-    #[cfg(target_arch = "arm")]
-    use core::arch::arm::*;
     #[cfg(target_arch = "aarch64")]
     use core::arch::aarch64::*;
+    #[cfg(target_arch = "arm")]
+    use core::arch::arm::*;
     use core::mem::transmute;
     unsafe {
         let value = transmute(value);
@@ -134,14 +147,19 @@ pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
     }
 }
 
-#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd"))]
+#[cfg(all(
+    any(target_arch = "arm", target_arch = "aarch64"),
+    any(target_feature = "aes", target_feature = "crypto"),
+    not(miri),
+    feature = "stdsimd"
+))]
 #[allow(unused)]
 #[inline(always)]
 pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
-    #[cfg(target_arch = "arm")]
-    use core::arch::arm::*;
     #[cfg(target_arch = "aarch64")]
     use core::arch::aarch64::*;
+    #[cfg(target_arch = "arm")]
+    use core::arch::arm::*;
     use core::mem::transmute;
     unsafe {
         let value = transmute(value);