Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sha2: fix AVX backend #345

Merged
merged 6 commits into from Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions sha2/CHANGELOG.md
Expand Up @@ -5,14 +5,25 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.10.0 (2021-12-07)
## 0.10.1 (2022-01-06)
### Fixed
- Bug in the AVX2 backend ([#345])

## 0.10.0 (2021-12-07) [YANKED]
### Changed
- Update to `digest` v0.10 ([#217])
- Rename `Sha512Trunc224` and `Sha512Trunc256` to `Sha512_224` and `Sha512_256` respectively. ([#217])

[#217]: https://github.com/RustCrypto/hashes/pull/217

## 0.9.8 (2021-09-09)
## 0.9.9 (2022-01-06)
### Fixed
- Backport [#345] bug fix for the AVX2 backend ([#346])

[#345]: https://github.com/RustCrypto/hashes/pull/345
[#346]: https://github.com/RustCrypto/hashes/pull/346

## 0.9.8 (2021-09-09) [YANKED]
### Fixed
- Bug in the AVX2 backend ([#314])

Expand Down
2 changes: 1 addition & 1 deletion sha2/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "sha2"
version = "0.10.0" # Also update html_root_url in lib.rs when bumping this
version = "0.10.1" # Also update html_root_url in lib.rs when bumping this
description = """
Pure Rust implementation of the SHA-2 hash function family
including SHA-224, SHA-256, SHA-384, and SHA-512.
Expand Down
2 changes: 1 addition & 1 deletion sha2/src/lib.rs
Expand Up @@ -48,7 +48,7 @@
#![doc(
html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
html_root_url = "https://docs.rs/sha2/0.10.0"
html_root_url = "https://docs.rs/sha2/0.10.1"
)]
#![warn(missing_docs, rust_2018_idioms)]

Expand Down
61 changes: 33 additions & 28 deletions sha2/src/sha512/x86.rs
Expand Up @@ -34,8 +34,8 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
start_block += 1;
}

let mut ms: MsgSchedule = Default::default();
let mut t2: RoundStates = [0u64; SHA512_ROUNDS_NUM];
let mut ms: MsgSchedule = [_mm_setzero_si128(); 8];
let mut t2: RoundStates = [_mm_setzero_si128(); 40];
let mut x = [_mm256_setzero_si256(); 8];

for i in (start_block..blocks.len()).step_by(2) {
Expand All @@ -56,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]

#[inline(always)]
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
let mut ms = Default::default();
let mut ms = [_mm_setzero_si128(); 8];
let mut x = [_mm_setzero_si128(); 8];

// Reduced to single iteration
Expand All @@ -82,7 +82,7 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const
_mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
);

_mm_store_si128(&mut ms[2 * $i] as *mut u64 as *mut _, y);
ms[$i] = y;
)*};
}

Expand Down Expand Up @@ -114,14 +114,8 @@ unsafe fn load_data_avx2(
let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));

_mm_store_si128(
&mut ms[2 * $i] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 0),
);
_mm_store_si128(
&mut t2[2 * $i] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 1),
);
ms[$i] = _mm256_extracti128_si256(y, 0);
t2[$i] = _mm256_extracti128_si256(y, 1);
)*};
}

Expand All @@ -137,10 +131,13 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &
let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
let y = sha512_update_x_avx(x, k64);

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
{
let ms = cast_ms(ms);
sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
}

_mm_store_si128(&mut ms[2 * j] as *const u64 as *mut _, y);
ms[j] = y;
k64_idx += 2;
}
}
Expand All @@ -160,17 +157,14 @@ unsafe fn rounds_0_63_avx2(
let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));

sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
{
let ms = cast_ms(ms);
sha_round(current_state, ms[2 * j]);
sha_round(current_state, ms[2 * j + 1]);
}

_mm_store_si128(
&mut ms[2 * j] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 0),
);
_mm_store_si128(
&mut t2[(16 * i) + 2 * j] as *mut u64 as *mut _,
_mm256_extracti128_si256(y, 1),
);
ms[j] = _mm256_extracti128_si256(y, 0);
t2[8 * i + j] = _mm256_extracti128_si256(y, 1);

k64x4_idx += 2;
}
Expand All @@ -179,14 +173,15 @@ unsafe fn rounds_0_63_avx2(

#[inline(always)]
fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) {
let ms = cast_ms(ms);
for i in 64..80 {
sha_round(current_state, ms[i & 0xf]);
}
}

#[inline(always)]
fn process_second_block(current_state: &mut State, t2: &RoundStates) {
for t2 in t2.iter() {
for t2 in cast_rs(t2).iter() {
sha_round(current_state, *t2);
}
}
Expand Down Expand Up @@ -341,9 +336,19 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
XOR = _mm256_xor_si256,
});

#[inline(always)]
fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
unsafe { &*(ms as *const MsgSchedule as *const _) }
}

#[inline(always)]
fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
unsafe { &*(rs as *const RoundStates as *const _) }
}

type State = [u64; SHA512_HASH_WORDS_NUM];
type MsgSchedule = [u64; SHA512_BLOCK_WORDS_NUM];
type RoundStates = [u64; SHA512_ROUNDS_NUM];
type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2];
type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2];

const SHA512_BLOCK_BYTE_LEN: usize = 128;
const SHA512_ROUNDS_NUM: usize = 80;
Expand Down