Skip to content

Commit

Permalink
sha2: Reduce memory pressure
Browse files Browse the repository at this point in the history
  • Loading branch information
Rexagon committed Sep 8, 2021
1 parent b2f6d86 commit 03a3ebd
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 66 deletions.
50 changes: 0 additions & 50 deletions sha2/src/consts.rs
Expand Up @@ -163,56 +163,6 @@ pub const K64X2: [[u64; 2]; 40] = [
[K64[79], K64[78]],
];

macro_rules! dup_array {
([$([$a:expr, $b:expr]),*,]) => {[
$($b, $a, $b, $a),*,
]}
}

/// Constants necessary for SHA-512 family of digests.
pub const K64X4: [u64; 160] = dup_array!([
[K64[1], K64[0]],
[K64[3], K64[2]],
[K64[5], K64[4]],
[K64[7], K64[6]],
[K64[9], K64[8]],
[K64[11], K64[10]],
[K64[13], K64[12]],
[K64[15], K64[14]],
[K64[17], K64[16]],
[K64[19], K64[18]],
[K64[21], K64[20]],
[K64[23], K64[22]],
[K64[25], K64[24]],
[K64[27], K64[26]],
[K64[29], K64[28]],
[K64[31], K64[30]],
[K64[33], K64[32]],
[K64[35], K64[34]],
[K64[37], K64[36]],
[K64[39], K64[38]],
[K64[41], K64[40]],
[K64[43], K64[42]],
[K64[45], K64[44]],
[K64[47], K64[46]],
[K64[49], K64[48]],
[K64[51], K64[50]],
[K64[53], K64[52]],
[K64[55], K64[54]],
[K64[57], K64[56]],
[K64[59], K64[58]],
[K64[61], K64[60]],
[K64[63], K64[62]],
[K64[65], K64[64]],
[K64[67], K64[66]],
[K64[69], K64[68]],
[K64[71], K64[70]],
[K64[73], K64[72]],
[K64[75], K64[74]],
[K64[77], K64[76]],
[K64[79], K64[78]],
]);

pub static H224: [u32; STATE_LEN] = [
0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4,
];
Expand Down
30 changes: 14 additions & 16 deletions sha2/src/sha512/x86.rs
Expand Up @@ -9,9 +9,9 @@ use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

use crate::consts::{K64, K64X4};
use crate::consts::K64;

cpufeatures::new!(avx2_cpuid, "avx", "avx2", "sse2", "sse3");
cpufeatures::new!(avx2_cpuid, "avx2");

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
Expand All @@ -25,7 +25,7 @@ pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
}
}

#[target_feature(enable = "avx,avx2,sse2,sse3")]
#[target_feature(enable = "avx2")]
unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
let mut start_block = 0;

Expand Down Expand Up @@ -110,10 +110,9 @@ unsafe fn load_data_avx2(
x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i + 1) as *const _), 0);

x[$i] = _mm256_shuffle_epi8(x[$i], MASK);
let y = _mm256_add_epi64(
x[$i],
_mm256_loadu_si256(&K64X4[4 * $i] as *const u64 as *const _),
);

let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));

_mm_store_si128(
&mut ms[2 * $i] as *mut u64 as *mut _,
Expand All @@ -135,7 +134,8 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &

for _ in 0..4 {
for j in 0..8 {
let y = sha512_update_x_avx(x, &K64[k64_idx] as *const u64 as *const _);
let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
let y = sha512_update_x_avx(x, k64);

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
Expand All @@ -153,11 +153,12 @@ unsafe fn rounds_0_63_avx2(
ms: &mut MsgSchedule,
t2: &mut RoundStates,
) {
let mut k64x4_idx: usize = 2 * SHA512_BLOCK_WORDS_NUM;
let mut k64x4_idx: usize = SHA512_BLOCK_WORDS_NUM;

for i in 1..5 {
for j in 0..8 {
let y = sha512_update_x_avx2(x, &K64X4[k64x4_idx] as *const u64 as *const _);
let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
Expand All @@ -171,7 +172,7 @@ unsafe fn rounds_0_63_avx2(
_mm256_extracti128_si256(y, 1),
);

k64x4_idx += 4;
k64x4_idx += 2;
}
}
}
Expand Down Expand Up @@ -249,14 +250,13 @@ unsafe fn accumulate_state(dst: &mut State, src: &State) {

macro_rules! fn_sha512_update_x {
($name:ident, $ty:ident, {
LOAD = $LOAD:ident,
ADD64 = $ADD64:ident,
ALIGNR8 = $ALIGNR8:ident,
SRL64 = $SRL64:ident,
SLL64 = $SLL64:ident,
XOR = $XOR:ident,
}) => {
unsafe fn $name(x: &mut [$ty; 8], k64_p: *const $ty) -> $ty {
unsafe fn $name(x: &mut [$ty; 8], k64: $ty) -> $ty {
// q[2:1]
let mut t0 = $ALIGNR8(x[1], x[0], 8);
// q[10:9]
Expand Down Expand Up @@ -320,13 +320,12 @@ macro_rules! fn_sha512_update_x {
x[6] = x[7];
x[7] = temp;

$ADD64(x[7], $LOAD(k64_p))
$ADD64(x[7], k64)
}
};
}

fn_sha512_update_x!(sha512_update_x_avx, __m128i, {
LOAD = _mm_loadu_si128,
ADD64 = _mm_add_epi64,
ALIGNR8 = _mm_alignr_epi8,
SRL64 = _mm_srli_epi64,
Expand All @@ -335,7 +334,6 @@ fn_sha512_update_x!(sha512_update_x_avx, __m128i, {
});

fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
LOAD = _mm256_loadu_si256,
ADD64 = _mm256_add_epi64,
ALIGNR8 = _mm256_alignr_epi8,
SRL64 = _mm256_srli_epi64,
Expand Down

0 comments on commit 03a3ebd

Please sign in to comment.