Skip to content

Commit

Permalink
sha2: fix AVX backend (#345)
Browse files Browse the repository at this point in the history
  • Loading branch information
newpavlov committed Jan 6, 2022
1 parent c478cbb commit 4c1d9da
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 33 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions sha2/CHANGELOG.md
Expand Up @@ -5,14 +5,25 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.10.0 (2021-12-07)
## 0.10.1 (2022-01-06)
### Fixed
- Bug in the AVX2 backend ([#345])

## 0.10.0 (2021-12-07) [YANKED]
### Changed
- Update to `digest` v0.10 ([#217])
- Rename `Sha512Trunc224` and `Sha512Trunc256` to `Sha512_224` and `Sha512_256` respectively. ([#217])

[#217]: https://github.com/RustCrypto/hashes/pull/217

## 0.9.8 (2021-09-09)
## 0.9.9 (2022-01-06)
### Fixed
- Backport [#345] bug fix for the AVX2 backend ([#346])

[#345]: https://github.com/RustCrypto/hashes/pull/345
[#346]: https://github.com/RustCrypto/hashes/pull/346

## 0.9.8 (2021-09-09) [YANKED]
### Fixed
- Bug in the AVX2 backend ([#314])

Expand Down
2 changes: 1 addition & 1 deletion sha2/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "sha2"
version = "0.10.0" # Also update html_root_url in lib.rs when bumping this
version = "0.10.1" # Also update html_root_url in lib.rs when bumping this
description = """
Pure Rust implementation of the SHA-2 hash function family
including SHA-224, SHA-256, SHA-384, and SHA-512.
Expand Down
2 changes: 1 addition & 1 deletion sha2/src/lib.rs
Expand Up @@ -48,7 +48,7 @@
#![doc(
html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
html_root_url = "https://docs.rs/sha2/0.10.0"
html_root_url = "https://docs.rs/sha2/0.10.1"
)]
#![warn(missing_docs, rust_2018_idioms)]

Expand Down
61 changes: 33 additions & 28 deletions sha2/src/sha512/x86.rs
Expand Up @@ -34,8 +34,8 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
start_block += 1;
}

let mut ms: MsgSchedule = Default::default();
let mut t2: RoundStates = [0u64; SHA512_ROUNDS_NUM];
let mut ms: MsgSchedule = [_mm_setzero_si128(); 8];
let mut t2: RoundStates = [_mm_setzero_si128(); 40];
let mut x = [_mm256_setzero_si256(); 8];

for i in (start_block..blocks.len()).step_by(2) {
Expand All @@ -56,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]

#[inline(always)]
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
let mut ms = Default::default();
let mut ms = [_mm_setzero_si128(); 8];
let mut x = [_mm_setzero_si128(); 8];

// Reduced to single iteration
Expand All @@ -82,7 +82,7 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const
_mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
);

_mm_store_si128(&mut ms[2 * $i] as *mut u64 as *mut _, y);
ms[$i] = y;
)*};
}

Expand Down Expand Up @@ -114,14 +114,8 @@ unsafe fn load_data_avx2(
let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));

_mm_store_si128(
&mut ms[2 * $i] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 0),
);
_mm_store_si128(
&mut t2[2 * $i] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 1),
);
ms[$i] = _mm256_extracti128_si256(y, 0);
t2[$i] = _mm256_extracti128_si256(y, 1);
)*};
}

Expand All @@ -137,10 +131,13 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &
let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
let y = sha512_update_x_avx(x, k64);

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
{
let ms = cast_ms(ms);
sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
}

_mm_store_si128(&mut ms[2 * j] as *const u64 as *mut _, y);
ms[j] = y;
k64_idx += 2;
}
}
Expand All @@ -160,17 +157,14 @@ unsafe fn rounds_0_63_avx2(
let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
{
let ms = cast_ms(ms);
sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
}

_mm_store_si128(
&mut ms[2 * j] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 0),
);
_mm_store_si128(
&mut t2[(16 * i) + 2 * j] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 1),
);
ms[j] = _mm256_extracti128_si256(y, 0);
t2[8 * i + j] = _mm256_extracti128_si256(y, 1);

k64x4_idx += 2;
}
Expand All @@ -179,14 +173,15 @@ unsafe fn rounds_0_63_avx2(

#[inline(always)]
fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) {
let ms = cast_ms(ms);
for i in 64..80 {
sha_round(current_state, ms[i & 0xf]);
}
}

#[inline(always)]
fn process_second_block(current_state: &mut State, t2: &RoundStates) {
for t2 in t2.iter() {
for t2 in cast_rs(t2).iter() {
sha_round(current_state, *t2);
}
}
Expand Down Expand Up @@ -341,9 +336,19 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
XOR = _mm256_xor_si256,
});

#[inline(always)]
fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
unsafe { &*(ms as *const MsgSchedule as *const _) }
}

#[inline(always)]
fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
unsafe { &*(rs as *const RoundStates as *const _) }
}

type State = [u64; SHA512_HASH_WORDS_NUM];
type MsgSchedule = [u64; SHA512_BLOCK_WORDS_NUM];
type RoundStates = [u64; SHA512_ROUNDS_NUM];
type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2];
type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2];

const SHA512_BLOCK_BYTE_LEN: usize = 128;
const SHA512_ROUNDS_NUM: usize = 80;
Expand Down

0 comments on commit 4c1d9da

Please sign in to comment.