Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof of Concept: VAES Support #144

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ no-rng = []
# in case this is being used on an architecture lacking core::sync::atomic::AtomicUsize and friends
atomic-polyfill = [ "dep:atomic-polyfill", "once_cell/atomic-polyfill"]

# Use VAES extension if possible. The hash value may be incompatible with NON-VAES targets
vaes = []
Comment on lines +46 to +47
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use cfg to detect the feature and don't need a feature declared here.


[[bench]]
name = "ahash"
path = "tests/bench.rs"
Expand Down
23 changes: 20 additions & 3 deletions src/aes_hash.rs → src/aes_hash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ use crate::random_state::PI;
use crate::RandomState;
use core::hash::Hasher;

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
feature = "vaes",
target_feature = "avx512vaes",
not(miri)
))]
mod vaes;

/// A `Hasher` for hashing an arbitrary stream of bytes.
///
/// Instances of [`AHasher`] represent state that is updated while hashing data.
Expand Down Expand Up @@ -47,6 +55,7 @@ impl AHasher {
///
/// println!("Hash is {:x}!", hasher.finish());
/// ```
#[allow(dead_code)]
#[inline]
pub(crate) fn new_with_keys(key1: u128, key2: u128) -> Self {
let pi: [u128; 2] = PI.convert();
Expand Down Expand Up @@ -80,13 +89,13 @@ impl AHasher {
}

#[inline(always)]
fn hash_in(&mut self, new_value: u128) {
pub(crate) fn hash_in(&mut self, new_value: u128) {
self.enc = aesenc(self.enc, new_value);
self.sum = shuffle_and_add(self.sum, new_value);
}

#[inline(always)]
fn hash_in_2(&mut self, v1: u128, v2: u128) {
pub(crate) fn hash_in_2(&mut self, v1: u128, v2: u128) {
self.enc = aesenc(self.enc, v1);
self.sum = shuffle_and_add(self.sum, v1);
self.enc = aesenc(self.enc, v2);
Expand Down Expand Up @@ -160,6 +169,15 @@ impl Hasher for AHasher {
self.hash_in(value.convert());
} else {
if data.len() > 32 {
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
feature = "vaes",
target_feature = "avx512vaes",
not(miri)
))]
if data.len() > 128 {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than adding another 'if' I think a cleaner way to handle this would be to add a function to factor out a method in operations. Like there could be aesenc_x4 which then uses cfg to provide one implementation or the other depending on if the cpu instruction is available.

return vaes::hash_batch_128b(&mut data, self);
}
if data.len() > 64 {
let tail = data.read_last_u128x4();
let mut current: [u128; 4] = [self.key; 4];
Expand Down Expand Up @@ -361,7 +379,6 @@ impl Hasher for AHasherStr {

#[cfg(test)]
mod tests {
use super::*;
use crate::convert::Convert;
use crate::operations::aesenc;
use crate::RandomState;
Expand Down
156 changes: 156 additions & 0 deletions src/aes_hash/vaes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use crate::convert::Convert;
use crate::operations::{add_by_64s, aesenc};

use super::AHasher;

mod intrinsic {
#[cfg(target_arch = "x86")]
pub use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
pub use core::arch::x86_64::*;
}

const SHUFFLE_MASKS: [u64; 2] = [0x020a0700_0c01030e_u64, 0x050f0d08_06090b04_u64];

#[derive(Copy, Clone)]
#[repr(transparent)]
struct Avx256(intrinsic::__m256i);

trait ReadFromSliceExt {
fn read_last_avx256x4(&self) -> [Avx256; 4];
fn read_avx256x4(&self) -> ([Avx256; 4], &Self);
}

impl ReadFromSliceExt for [u8] {
#[inline(always)]
fn read_last_avx256x4(&self) -> [Avx256; 4] {
use intrinsic::_mm256_loadu_si256;
let ptr = self.as_ptr();
let offset = self.len() as isize - 128;
unsafe {
[
Avx256(_mm256_loadu_si256(ptr.offset(offset + 0 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(offset + 1 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(offset + 2 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(offset + 3 * 32) as *const _)),
]
}
}

#[inline(always)]
fn read_avx256x4(&self) -> ([Avx256; 4], &Self) {
use intrinsic::_mm256_loadu_si256;
let (value, rest) = self.split_at(128);
let ptr = value.as_ptr();
let array = unsafe {
[
Avx256(_mm256_loadu_si256(ptr.offset(0 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(1 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(2 * 32) as *const _)),
Avx256(_mm256_loadu_si256(ptr.offset(3 * 32) as *const _)),
]
};
(array, rest)
}
}

// Rust is confused with targets supporting VAES without AVX512 extensions.
// We need to manually specify the underlying intrinsic; otherwise the compiler
// will have trouble inlining the code.
Comment on lines +57 to +59
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a link to an issue on this?

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.aesni.aesenc.256"]
fn aesenc_256(a: Avx256, round_key: Avx256) -> Avx256;
}

impl Avx256 {
#[inline(always)]
fn aesenc(self, xor: Self) -> Self {
unsafe { aesenc_256(self, xor) }
}
#[inline(always)]
fn add_by_64s(self, other: Self) -> Self {
use intrinsic::_mm256_add_epi64;
Self(unsafe { _mm256_add_epi64(self.0, other.0) })
}
#[inline(always)]
fn shuffle(self) -> Self {
use intrinsic::{_mm256_set_epi64x, _mm256_shuffle_epi8};
unsafe {
let mask = _mm256_set_epi64x(
SHUFFLE_MASKS[0] as _,
SHUFFLE_MASKS[1] as _,
SHUFFLE_MASKS[0] as _,
SHUFFLE_MASKS[1] as _,
);
Self(_mm256_shuffle_epi8(self.0, mask))
}
}
#[inline(always)]
fn shuffle_and_add(self, other: Self) -> Self {
self.shuffle().add_by_64s(other)
}
#[inline(always)]
fn from_u128(data: u128) -> Self {
use core::mem::transmute;
use intrinsic::_mm256_set_m128i;
Self(unsafe { _mm256_set_m128i(transmute(data), transmute(data)) })
}
#[inline(always)]
fn to_u128x2(self) -> [u128; 2] {
use core::mem::transmute;
use intrinsic::_mm256_extracti128_si256;
unsafe {
[
transmute(_mm256_extracti128_si256::<0>(self.0)),
transmute(_mm256_extracti128_si256::<1>(self.0)),
]
}
}
}

#[inline(never)]
pub(crate) fn hash_batch_128b(data: &mut &[u8], hasher: &mut AHasher) {
let tail = data.read_last_avx256x4();
let duplicated_key = Avx256::from_u128(hasher.key);
let mut current: [Avx256; 4] = [duplicated_key; 4];
current[0] = current[0].aesenc(tail[0]);
current[1] = current[1].aesenc(tail[1]);
current[2] = current[2].aesenc(tail[2]);
current[3] = current[3].aesenc(tail[3]);
let mut sum: [Avx256; 2] = [duplicated_key, duplicated_key];
sum[0] = sum[0].add_by_64s(tail[0]);
sum[0] = sum[0].shuffle_and_add(tail[1]);
sum[1] = sum[1].add_by_64s(tail[2]);
sum[1] = sum[1].shuffle_and_add(tail[3]);
while data.len() > 128 {
let (blocks, rest) = data.read_avx256x4();
current[0] = current[0].aesenc(blocks[0]);
current[1] = current[1].aesenc(blocks[1]);
current[2] = current[2].aesenc(blocks[2]);
current[3] = current[3].aesenc(blocks[3]);
sum[0] = sum[0].shuffle_and_add(blocks[0]);
sum[1] = sum[1].shuffle_and_add(blocks[1]);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps sum[0]

sum[0] = sum[0].shuffle_and_add(blocks[2]);
sum[1] = sum[1].shuffle_and_add(blocks[3]);
*data = rest;
}
let current = [
current[0].to_u128x2(),
current[1].to_u128x2(),
current[2].to_u128x2(),
current[3].to_u128x2(),
];
let sum = [sum[0].to_u128x2(), sum[1].to_u128x2()];

hasher.hash_in_2(
aesenc(current[0][0], current[0][1]),
aesenc(current[1][0], current[1][1]),
);
hasher.hash_in(add_by_64s(sum[0][0].convert(), sum[0][1].convert()).convert());
hasher.hash_in_2(
aesenc(current[2][0], current[2][1]),
aesenc(current[3][0], current[3][1]),
);
hasher.hash_in(add_by_64s(sum[1][0].convert(), sum[1][1].convert()).convert());
}
3 changes: 1 addition & 2 deletions src/hash_map.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::borrow::Borrow;
use std::collections::{hash_map, HashMap};
use std::collections::hash_map::{IntoKeys, IntoValues};
use std::collections::{hash_map, HashMap};
use std::fmt::{self, Debug};
use std::hash::{BuildHasher, Hash};
use std::iter::FromIterator;
Expand All @@ -14,7 +14,6 @@ use serde::{
};

use crate::RandomState;
use crate::random_state::RandomSource;

/// A [`HashMap`](std::collections::HashMap) using [`RandomState`](crate::RandomState) to hash the items.
/// (Requires the `std` feature to be enabled.)
Expand Down
1 change: 0 additions & 1 deletion src/hash_set.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::RandomState;
use crate::random_state::RandomSource;
use std::collections::{hash_set, HashSet};
use std::fmt::{self, Debug};
use std::hash::{BuildHasher, Hash};
Expand Down
8 changes: 6 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
//! But this also means that different computers or computers using different versions of ahash may observe different
//! hash values for the same input.
#![cfg_attr(
all(feature = "std", any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng")),
all(
feature = "std",
any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng")
),
doc = r##"
# Basic Usage
AHash provides an implementation of the [Hasher] trait.
Expand Down Expand Up @@ -97,7 +100,8 @@ Note the import of [HashMapExt]. This is needed for the constructor.
#![cfg_attr(feature = "specialize", feature(min_specialization))]
#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))]
#![cfg_attr(feature = "stdsimd", feature(stdsimd))]

#![cfg_attr(feature = "vaes", feature(link_llvm_intrinsics))]
#![cfg_attr(feature = "vaes", feature(simd_ffi))]
#[macro_use]
mod convert;

Expand Down
18 changes: 8 additions & 10 deletions src/random_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ impl fmt::Debug for RandomState {
}

impl RandomState {

/// Create a new `RandomState` `BuildHasher` using random keys.
///
/// Each instance will have a unique set of keys derived from [RandomSource].
Expand Down Expand Up @@ -316,8 +315,8 @@ impl RandomState {
/// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash:
/// For example:
#[cfg_attr(
feature = "std",
doc = r##" # Examples
feature = "std",
doc = r##" # Examples
```
use std::hash::BuildHasher;
use ahash::RandomState;
Expand All @@ -329,8 +328,8 @@ impl RandomState {
)]
/// This is similar to:
#[cfg_attr(
feature = "std",
doc = r##" # Examples
feature = "std",
doc = r##" # Examples
```
use std::hash::{BuildHasher, Hash, Hasher};
use ahash::RandomState;
Expand Down Expand Up @@ -418,12 +417,11 @@ impl BuildHasher for RandomState {
AHasher::from_random_state(self)
}


/// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash:
/// For example:
#[cfg_attr(
feature = "std",
doc = r##" # Examples
feature = "std",
doc = r##" # Examples
```
use std::hash::BuildHasher;
use ahash::RandomState;
Expand All @@ -435,8 +433,8 @@ impl BuildHasher for RandomState {
)]
/// This is similar to:
#[cfg_attr(
feature = "std",
doc = r##" # Examples
feature = "std",
doc = r##" # Examples
```
use std::hash::{BuildHasher, Hash, Hasher};
use ahash::RandomState;
Expand Down