Skip to content

Commit

Permalink
Prevent perf regression on Aarch64 (#228)
Browse files Browse the repository at this point in the history
* Add mixcolumns step accedently removed earlier.

Signed-off-by: Tom Kaitchuck <Tom.Kaitchuck@gmail.com>
  • Loading branch information
tkaitchuck committed Mar 27, 2024
1 parent 0a0e493 commit cd5000a
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 37 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/rust.yml
Expand Up @@ -64,7 +64,7 @@ jobs:
- run: cargo +1.72.0 check --target armv7-unknown-linux-gnueabihf
aarch64-apple-darwin:
name: Aarch64 Apple Darwin
runs-on: macos-latest
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
Expand All @@ -80,6 +80,13 @@ jobs:
toolchain: 1.72.0
targets: aarch64-apple-darwin
- run: cargo +1.72.0 check --target aarch64-apple-darwin
# aarch64-debug:
# name: Debug Apple
# runs-on: macos-14
# steps:
# - uses: actions/checkout@v2
# - name: Setup upterm session
# uses: lhotari/action-upterm@v1
i686-unknown-linux-gnu:
name: Linux i686
runs-on: ubuntu-latest
Expand Down
8 changes: 0 additions & 8 deletions src/aes_hash.rs
Expand Up @@ -101,16 +101,8 @@ impl AHasher {
let result: [u64; 2] = aesdec(combined, combined).convert();
result[0]
}

#[inline]
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
fn final_mix(&self) -> u128 {
let sum = aesenc(self.sum, self.key);
aesdec(aesdec(sum, self.enc), sum)
}

#[inline]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn final_mix(&self) -> u128 {
let combined = aesenc(self.sum, self.enc);
aesdec(aesdec(combined, self.key), combined)
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Expand Up @@ -251,8 +251,8 @@ impl Default for AHasher {
// #[inline(never)]
// #[doc(hidden)]
// pub fn hash_test(input: &[u8]) -> u64 {
// let a = RandomState::with_seeds(11, 22, 33, 44);
// <[u8]>::get_hash(input, &a)
// let a = RandomState::<&[u8]>::with_seeds(11, 22, 33, 44);
// a.hash_one(input)
// }

#[cfg(feature = "std")]
Expand Down
57 changes: 31 additions & 26 deletions src/operations.rs
Expand Up @@ -7,7 +7,7 @@ pub(crate) const MULTIPLE: u64 = 6364136223846793005;

/// This is a constant with a lot of special properties found by automated search.
/// See the unit tests below. (Below are alternative values)
#[cfg(all(target_feature = "ssse3", not(miri)))]
#[allow(dead_code)]
const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
//const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128;
//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;
Expand Down Expand Up @@ -51,17 +51,19 @@ pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {

#[inline(always)]
pub(crate) fn shuffle(a: u128) -> u128 {
#[cfg(all(target_feature = "ssse3", not(miri)))]
{
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
}
#[cfg(not(all(target_feature = "ssse3", not(miri))))]
{
a.swap_bytes()
cfg_if::cfg_if! {
if #[cfg(all(target_feature = "ssse3", not(miri)))] {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
} else if #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))] {
use core::arch::aarch64::vqtbl1q_s8;
unsafe { transmute!(vqtbl1q_s8(transmute!(a), transmute!(SHUFFLE_MASK))) }
} else {
a.swap_bytes()
}
}
}

Expand All @@ -79,22 +81,25 @@ pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
add_by_64s(shuffled, to_add.convert()).convert()
}

#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
#[inline(always)]
pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
cfg_if::cfg_if! {
if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))] {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
}
} else if #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))] {
use core::arch::aarch64::vaddq_u64;
unsafe { transmute!(vaddq_u64(transmute!(a), transmute!(b))) }
} else {
[a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
}
}
}

#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
#[inline(always)]
pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
[a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
}

#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
Expand Down Expand Up @@ -122,7 +127,7 @@ pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
unsafe { transmute!(vaeseq_u8(transmute!(value), transmute!(xor))) }
unsafe { transmute!(vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(xor)))) }
}

#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
Expand Down Expand Up @@ -150,7 +155,7 @@ pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
unsafe { transmute!(vaesdq_u8(transmute!(value), transmute!(xor))) }
unsafe { transmute!(vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(xor)))) }
}

#[allow(unused)]
Expand Down

0 comments on commit cd5000a

Please sign in to comment.