Skip to content

Commit

Permalink
Improve fallback speed when folded multiply is not available (#134)
Browse files Browse the repository at this point in the history
* Use psudo-folded-multiply to simplify code 

Signed-off-by: Tom Kaitchuck <Tom.Kaitchuck@gmail.com>
  • Loading branch information
tkaitchuck committed Oct 25, 2022
1 parent e9c6735 commit 8eeeabc
Show file tree
Hide file tree
Showing 13 changed files with 1,306 additions and 111 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Expand Up @@ -148,4 +148,4 @@ jobs:
- uses: actions-rs/cargo@v1
with:
command: check
args: --target wasm32-unknown-unknown
args: --target wasm32-unknown-unknown --no-default-features
1 change: 0 additions & 1 deletion build.rs
Expand Up @@ -20,5 +20,4 @@ fn main() {
{
println!("cargo:rustc-cfg=feature=\"folded_multiply\"");
}

}
1,137 changes: 1,137 additions & 0 deletions smhasher/fallbackNoFoldedOutput.txt

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions src/aes_hash.rs
@@ -1,8 +1,8 @@
use crate::convert::*;
use crate::operations::*;
use crate::random_state::PI;
use crate::RandomState;
use core::hash::Hasher;
use crate::random_state::PI;

/// A `Hasher` for hashing an arbitrary stream of bytes.
///
Expand Down Expand Up @@ -68,7 +68,6 @@ impl AHasher {
}
}


#[inline]
pub(crate) fn from_random_state(rand_state: &RandomState) -> Self {
let key1 = [rand_state.k0, rand_state.k1].convert();
Expand Down Expand Up @@ -128,7 +127,11 @@ impl Hasher for AHasher {
}

#[inline]
#[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))]
#[cfg(any(
target_pointer_width = "64",
target_pointer_width = "32",
target_pointer_width = "16"
))]
fn write_usize(&mut self, i: usize) {
self.write_u64(i as u64);
}
Expand Down Expand Up @@ -317,7 +320,7 @@ pub(crate) struct AHasherStr(pub AHasher);
impl Hasher for AHasherStr {
#[inline]
fn finish(&self) -> u64 {
let result : [u64; 2] = self.0.enc.convert();
let result: [u64; 2] = self.0.enc.convert();
result[0]
}

Expand Down Expand Up @@ -428,4 +431,3 @@ mod tests {
assert_eq!(bytes, 0x6464646464646464);
}
}

8 changes: 2 additions & 6 deletions src/convert.rs
Expand Up @@ -7,17 +7,13 @@ macro_rules! convert {
impl Convert<$b> for $a {
#[inline(always)]
fn convert(self) -> $b {
unsafe {
core::mem::transmute::<$a, $b>(self)
}
unsafe { core::mem::transmute::<$a, $b>(self) }
}
}
impl Convert<$a> for $b {
#[inline(always)]
fn convert(self) -> $a {
unsafe {
core::mem::transmute::<$b, $a>(self)
}
unsafe { core::mem::transmute::<$b, $a>(self) }
}
}
};
Expand Down
37 changes: 6 additions & 31 deletions src/fallback_hash.rs
Expand Up @@ -6,8 +6,6 @@ use crate::random_state::PI;
use crate::RandomState;
use core::hash::Hasher;



const ROT: u32 = 23; //17

/// A `Hasher` for hashing an arbitrary stream of bytes.
Expand Down Expand Up @@ -94,19 +92,10 @@ impl AHasher {
/// attacker somehow knew part of (but not all) the contents of the buffer before hand,
/// they would not be able to predict any of the bits in the buffer at the end.
#[inline(always)]
#[cfg(feature = "folded_multiply")]
fn update(&mut self, new_data: u64) {
self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE);
}

#[inline(always)]
#[cfg(not(feature = "folded_multiply"))]
fn update(&mut self, new_data: u64) {
let d1 = (new_data ^ self.buffer).wrapping_mul(MULTIPLE);
self.pad = (self.pad ^ d1).rotate_left(8).wrapping_mul(MULTIPLE);
self.buffer = (self.buffer ^ self.pad).rotate_left(24);
}

/// Similar to the above this function performs an update using a "folded multiply".
/// However it takes in 128 bits of data instead of 64. Both halves must be masked.
///
Expand All @@ -119,21 +108,12 @@ impl AHasher {
/// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require
/// knowing the keys.
#[inline(always)]
#[cfg(feature = "folded_multiply")]
fn large_update(&mut self, new_data: u128) {
let block: [u64; 2] = new_data.convert();
let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]);
self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT);
}

#[inline(always)]
#[cfg(not(feature = "folded_multiply"))]
fn large_update(&mut self, new_data: u128) {
let block: [u64; 2] = new_data.convert();
self.update(block[0] ^ self.extra_keys[0]);
self.update(block[1] ^ self.extra_keys[1]);
}

#[inline]
#[cfg(feature = "specialize")]
fn short_finish(&self) -> u64 {
Expand Down Expand Up @@ -171,7 +151,11 @@ impl Hasher for AHasher {
}

#[inline]
#[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))]
#[cfg(any(
target_pointer_width = "64",
target_pointer_width = "32",
target_pointer_width = "16"
))]
fn write_usize(&mut self, i: usize) {
self.write_u64(i as u64);
}
Expand Down Expand Up @@ -209,18 +193,10 @@ impl Hasher for AHasher {
}

#[inline]
#[cfg(feature = "folded_multiply")]
fn finish(&self) -> u64 {
let rot = (self.buffer & 63) as u32;
folded_multiply(self.buffer, self.pad).rotate_left(rot)
}

#[inline]
#[cfg(not(feature = "folded_multiply"))]
fn finish(&self) -> u64 {
let rot = (self.buffer & 63) as u32;
(self.buffer.wrapping_mul(MULTIPLE) ^ self.pad).rotate_left(rot)
}
}

#[cfg(feature = "specialize")]
Expand Down Expand Up @@ -339,8 +315,7 @@ impl Hasher for AHasherStr {
self.0.write(bytes)
} else {
let value = read_small(bytes);
self.0.buffer = folded_multiply(value[0] ^ self.0.buffer,
value[1] ^ self.0.extra_keys[1]);
self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, value[1] ^ self.0.extra_keys[1]);
self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64);
}
}
Expand Down
31 changes: 25 additions & 6 deletions src/hash_quality_test.rs
Expand Up @@ -147,7 +147,13 @@ fn assert_each_byte_differs(num: u64, base: u64, alternitives: Vec<u64>) {
for alternitive in alternitives {
changed_bits |= base ^ alternitive
}
assert_eq!(core::u64::MAX, changed_bits, "Bits changed: {:x} on num: {:?}", changed_bits, num);
assert_eq!(
core::u64::MAX,
changed_bits,
"Bits changed: {:x} on num: {:?}",
changed_bits,
num
);
}

fn test_finish_is_consistent<T: Hasher>(constructor: impl Fn(u128, u128) -> T) {
Expand Down Expand Up @@ -273,11 +279,19 @@ fn test_padding_doesnot_collide<T: Hasher>(hasher: impl Fn() -> T) {
let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish());
assert!(
same_bytes <= 3,
"{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish()
"{} bytes of {} -> {:x} vs {:x}",
num,
c,
value,
long.finish()
);
assert!(
same_nibbles <= 8,
"{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish()
"{} bytes of {} -> {:x} vs {:x}",
num,
c,
value,
long.finish()
);
let flipped_bits = (value ^ long.finish()).count_ones();
assert!(flipped_bits > 10);
Expand Down Expand Up @@ -370,7 +384,7 @@ mod fallback_tests {
fn fallback_keys_affect_every_byte() {
//For fallback second key is not used in every hash.
#[cfg(all(not(feature = "specialize"), feature = "folded_multiply"))]
test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a));
test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a));
test_keys_affect_every_byte("", |a, b| AHasher::new_with_keys(a ^ b, a));
test_keys_affect_every_byte((0, 0), |a, b| AHasher::new_with_keys(a ^ b, a));
}
Expand All @@ -397,7 +411,12 @@ mod fallback_tests {
///Basic sanity tests of the cypto properties of aHash.
#[cfg(any(
all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)),
all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd")
all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_feature = "aes", target_feature = "crypto"),
not(miri),
feature = "stdsimd"
)
))]
#[cfg(test)]
mod aes_tests {
Expand Down Expand Up @@ -460,7 +479,7 @@ mod aes_tests {
#[test]
fn aes_keys_affect_every_byte() {
#[cfg(not(feature = "specialize"))]
test_keys_affect_every_byte(0, AHasher::test_with_keys);
test_keys_affect_every_byte(0, AHasher::test_with_keys);
test_keys_affect_every_byte("", AHasher::test_with_keys);
test_keys_affect_every_byte((0, 0), AHasher::test_with_keys);
}
Expand Down
18 changes: 12 additions & 6 deletions src/lib.rs
Expand Up @@ -8,8 +8,10 @@
//!
//! aHash uses the hardware AES instruction on x86 processors to provide a keyed hash function.
//! aHash is not a cryptographically secure hash.
//!
#![cfg_attr(any(feature = "compile-time-rng", feature = "runtime-rng"), doc = r##"
//!
#![cfg_attr(
any(feature = "compile-time-rng", feature = "runtime-rng"),
doc = r##"
# Example
```
use ahash::{AHasher, RandomState};
Expand All @@ -18,8 +20,11 @@ use std::collections::HashMap;
let mut map: HashMap<i32, i32, RandomState> = HashMap::default();
map.insert(12, 34);
```
"##)]
#![cfg_attr(feature = "std", doc = r##"
"##
)]
#![cfg_attr(
feature = "std",
doc = r##"
For convenience, both new-type wrappers and type aliases are provided. The new type wrappers are called called `AHashMap` and `AHashSet`. These do the same thing with slightly less typing.
The type aliases are called `ahash::HashMap`, `ahash::HashSet` are also provided and alias the
std::[HashMap] and std::[HashSet]. Why are there two options? The wrappers are convenient but
Expand All @@ -39,7 +44,8 @@ map.insert(12, 34);
let mut set = ahash::HashSet::with_capacity(10);
set.insert(10);
```
"##)]
"##
)]
#![deny(clippy::correctness, clippy::complexity, clippy::perf)]
#![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)]
#![cfg_attr(all(not(test), not(feature = "std")), no_std)]
Expand Down Expand Up @@ -265,10 +271,10 @@ impl<B: BuildHasher> BuildHasherExt for B {
#[cfg(test)]
mod test {
use crate::convert::Convert;
use crate::specialize::CallHasher;
use crate::*;
use std::collections::HashMap;
use std::hash::Hash;
use crate::specialize::CallHasher;

#[test]
fn test_ahash_alias_map_construction() {
Expand Down
32 changes: 25 additions & 7 deletions src/operations.rs
@@ -1,6 +1,6 @@
use crate::convert::*;

///This constant come from Kunth's prng (Empirically it works better than those from splitmix32).
///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32).
pub(crate) const MULTIPLE: u64 = 6364136223846793005;

/// This is a constant with a lot of special properties found by automated search.
Expand All @@ -11,11 +11,19 @@ const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;

#[inline(always)]
#[cfg(feature = "folded_multiply")]
pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
let result = (s as u128).wrapping_mul(by as u128);
((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
}

#[inline(always)]
#[cfg(not(feature = "folded_multiply"))]
pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
let b1 = s.wrapping_mul(by.swap_bytes());
let b2 = s.swap_bytes().wrapping_mul(!by);
b1 ^ b2.swap_bytes()
}

/// Given a small (less than 8 byte slice) returns the same data stored in two u32s.
/// (order of and non-duplication of bytes is NOT guaranteed)
Expand Down Expand Up @@ -104,14 +112,19 @@ pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
}
}

#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd"))]
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_feature = "aes", target_feature = "crypto"),
not(miri),
feature = "stdsimd"
))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
use core::mem::transmute;
unsafe {
let value = transmute(value);
Expand All @@ -134,14 +147,19 @@ pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
}
}

#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd"))]
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_feature = "aes", target_feature = "crypto"),
not(miri),
feature = "stdsimd"
))]
#[allow(unused)]
#[inline(always)]
pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
use core::mem::transmute;
unsafe {
let value = transmute(value);
Expand Down

0 comments on commit 8eeeabc

Please sign in to comment.