From 4c1d9dab614af86ba4f3f3f1e23d0444684e3e6b Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Thu, 6 Jan 2022 19:27:43 +0000 Subject: [PATCH] sha2: fix AVX backend (#345) --- Cargo.lock | 2 +- sha2/CHANGELOG.md | 15 +++++++++-- sha2/Cargo.toml | 2 +- sha2/src/lib.rs | 2 +- sha2/src/sha512/x86.rs | 61 +++++++++++++++++++++++------------------- 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d96adb4..d15f22c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -212,7 +212,7 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.0" +version = "0.10.1" dependencies = [ "cfg-if", "cpufeatures", diff --git a/sha2/CHANGELOG.md b/sha2/CHANGELOG.md index b59a388b..1e52559e 100644 --- a/sha2/CHANGELOG.md +++ b/sha2/CHANGELOG.md @@ -5,14 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 0.10.0 (2021-12-07) +## 0.10.1 (2022-01-06) +### Fixed +- Bug in the AVX2 backend ([#345]) + +## 0.10.0 (2021-12-07) [YANKED] ### Changed - Update to `digest` v0.10 ([#217]) - Rename `Sha512Trunc224` and `Sha512Trunc256` to `Sha512_224` and `Sha512_256` respectively. ([#217]) [#217]: https://github.com/RustCrypto/hashes/pull/217 -## 0.9.8 (2021-09-09) +## 0.9.9 (2022-01-06) +### Fixed +- Backport [#345] bug fix for the AVX2 backend ([#346]) + +[#345]: https://github.com/RustCrypto/hashes/pull/345 +[#346]: https://github.com/RustCrypto/hashes/pull/346 + +## 0.9.8 (2021-09-09) [YANKED] ### Fixed - Bug in the AVX2 backend ([#314]) diff --git a/sha2/Cargo.toml b/sha2/Cargo.toml index 5a998403..8a0ed161 100644 --- a/sha2/Cargo.toml +++ b/sha2/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sha2" -version = "0.10.0" # Also update html_root_url in lib.rs when bumping this +version = "0.10.1" # Also update html_root_url in lib.rs when bumping this description = """ Pure Rust implementation of the SHA-2 hash function family including SHA-224, SHA-256, SHA-384, and SHA-512. diff --git a/sha2/src/lib.rs b/sha2/src/lib.rs index 651bc606..3bc09f2d 100644 --- a/sha2/src/lib.rs +++ b/sha2/src/lib.rs @@ -48,7 +48,7 @@ #![doc( html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg", - html_root_url = "https://docs.rs/sha2/0.10.0" + html_root_url = "https://docs.rs/sha2/0.10.1" )] #![warn(missing_docs, rust_2018_idioms)] diff --git a/sha2/src/sha512/x86.rs b/sha2/src/sha512/x86.rs index 5f555c21..bb790408 100644 --- a/sha2/src/sha512/x86.rs +++ b/sha2/src/sha512/x86.rs @@ -34,8 +34,8 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]] start_block += 1; } - let mut ms: MsgSchedule = Default::default(); - let mut t2: RoundStates = [0u64; SHA512_ROUNDS_NUM]; + let mut ms: MsgSchedule = [_mm_setzero_si128(); 8]; + let mut t2: RoundStates = [_mm_setzero_si128(); 40]; let mut x = [_mm256_setzero_si256(); 8]; for i in (start_block..blocks.len()).step_by(2) { @@ -56,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]] #[inline(always)] unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) { - let mut ms = Default::default(); + let mut ms = [_mm_setzero_si128(); 8]; let mut x = [_mm_setzero_si128(); 8]; // Reduced to single iteration @@ -82,7 +82,7 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const _mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _), ); - _mm_store_si128(&mut ms[2 * $i] as *mut u64 as *mut _, y); + ms[$i] = y; )*}; } @@ -114,14 +114,8 @@ unsafe fn load_data_avx2( let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _); let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t)); - _mm_store_si128( - &mut ms[2 * $i] as *mut u64 as *mut _, - _mm256_extracti128_si256(y, 0), - ); - _mm_store_si128( - &mut t2[2 * $i] as *mut u64 as *mut _, - _mm256_extracti128_si256(y, 1), - ); + ms[$i] = _mm256_extracti128_si256(y, 0); + t2[$i] = _mm256_extracti128_si256(y, 1); )*}; } @@ -137,10 +131,13 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: & let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _); let y = sha512_update_x_avx(x, k64); - sha_round(current_state, ms[2 * j]); - sha_round(current_state, ms[2 * j + 1]); + { + let ms = cast_ms(ms); + sha_round(current_state, ms[2 * j]); + sha_round(current_state, ms[2 * j + 1]); + } - _mm_store_si128(&mut ms[2 * j] as *const u64 as *mut _, y); + ms[j] = y; k64_idx += 2; } } @@ -160,17 +157,14 @@ unsafe fn rounds_0_63_avx2( let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _); let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t)); - sha_round(current_state, ms[2 * j]); - sha_round(current_state, ms[2 * j + 1]); + { + let ms = cast_ms(ms); + sha_round(current_state, ms[2 * j]); + sha_round(current_state, ms[2 * j + 1]); + } - _mm_store_si128( - &mut ms[2 * j] as *mut u64 as *mut _, - _mm256_extracti128_si256(y, 0), - ); - _mm_store_si128( - &mut t2[(16 * i) + 2 * j] as *mut u64 as *mut _, - _mm256_extracti128_si256(y, 1), - ); + ms[j] = _mm256_extracti128_si256(y, 0); + t2[8 * i + j] = _mm256_extracti128_si256(y, 1); k64x4_idx += 2; } @@ -179,6 +173,7 @@ unsafe fn rounds_0_63_avx2( #[inline(always)] fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) { + let ms = cast_ms(ms); for i in 64..80 { sha_round(current_state, ms[i & 0xf]); } @@ -186,7 +181,7 @@ fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) { #[inline(always)] fn process_second_block(current_state: &mut State, t2: &RoundStates) { - for t2 in t2.iter() { + for t2 in cast_rs(t2).iter() { sha_round(current_state, *t2); } } @@ -341,9 +336,19 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, { XOR = _mm256_xor_si256, }); +#[inline(always)] +fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] { + unsafe { &*(ms as *const MsgSchedule as *const _) } +} + +#[inline(always)] +fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] { + unsafe { &*(rs as *const RoundStates as *const _) } +} + type State = [u64; SHA512_HASH_WORDS_NUM]; -type MsgSchedule = [u64; SHA512_BLOCK_WORDS_NUM]; -type RoundStates = [u64; SHA512_ROUNDS_NUM]; +type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2]; +type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2]; const SHA512_BLOCK_BYTE_LEN: usize = 128; const SHA512_ROUNDS_NUM: usize = 80;