Skip to content

Commit

Permalink
get rid of loops in impl functions
Browse files Browse the repository at this point in the history
  • Loading branch information
oconnor663 committed Jul 19, 2023
1 parent e56c6a8 commit e9643f4
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 38 deletions.
26 changes: 12 additions & 14 deletions rust/guts/src/avx512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,15 @@ unsafe extern "C" fn xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
mut counter: u64,
counter: u64,
flags: u32,
mut out: *mut u8,
mut out_len: usize,
out: *mut u8,
out_len: usize,
) {
while out_len >= 16 * BLOCK_LEN {
debug_assert!(out_len <= 16 * BLOCK_LEN);
if out_len == 16 * BLOCK_LEN {
blake3_guts_avx512_xof_16_exact(block, block_len, cv, counter, flags, out);
counter += 16;
out = out.add(16 * BLOCK_LEN);
out_len -= 16 * BLOCK_LEN;
return;
}
crate::xof_using_compress_xof(
blake3_guts_avx512_compress_xof,
Expand All @@ -149,16 +148,15 @@ unsafe extern "C" fn xof_xor(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
mut counter: u64,
counter: u64,
flags: u32,
mut out: *mut u8,
mut out_len: usize,
out: *mut u8,
out_len: usize,
) {
while out_len >= 16 * BLOCK_LEN {
debug_assert!(out_len <= 16 * BLOCK_LEN);
if out_len == 16 * BLOCK_LEN {
blake3_guts_avx512_xof_xor_16_exact(block, block_len, cv, counter, flags, out);
counter += 16;
out = out.add(16 * BLOCK_LEN);
out_len -= 16 * BLOCK_LEN;
return;
}
crate::xof_xor_using_compress_xof(
blake3_guts_avx512_compress_xof,
Expand Down
64 changes: 40 additions & 24 deletions rust/guts/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,20 +284,27 @@ impl Implementation {
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
counter: u64,
mut counter: u64,
flags: u32,
out: &mut [u8],
mut out: &mut [u8],
) {
unsafe {
self.xof_fn()(
block,
block_len,
cv,
counter,
flags | ROOT,
out.as_mut_ptr(),
out.len(),
);
let degree = self.degree();
let simd_len = degree * BLOCK_LEN;
while !out.is_empty() {
let take = cmp::min(simd_len, out.len());
unsafe {
self.xof_fn()(
block,
block_len,
cv,
counter,
flags | ROOT,
out.as_mut_ptr(),
take,
);
}
out = &mut out[take..];
counter += degree as u64;
}
}

Expand All @@ -312,20 +319,27 @@ impl Implementation {
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
counter: u64,
mut counter: u64,
flags: u32,
out: &mut [u8],
mut out: &mut [u8],
) {
unsafe {
self.xof_xor_fn()(
block,
block_len,
cv,
counter,
flags | ROOT,
out.as_mut_ptr(),
out.len(),
);
let degree = self.degree();
let simd_len = degree * BLOCK_LEN;
while !out.is_empty() {
let take = cmp::min(simd_len, out.len());
unsafe {
self.xof_xor_fn()(
block,
block_len,
cv,
counter,
flags | ROOT,
out.as_mut_ptr(),
take,
);
}
out = &mut out[take..];
counter += degree as u64;
}
}

Expand Down Expand Up @@ -608,6 +622,7 @@ unsafe fn xof_using_compress_xof(
mut out: *mut u8,
mut out_len: usize,
) {
debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
while out_len > 0 {
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
Expand All @@ -630,6 +645,7 @@ unsafe fn xof_xor_using_compress_xof(
mut out: *mut u8,
mut out_len: usize,
) {
debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
while out_len > 0 {
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
Expand Down

0 comments on commit e9643f4

Please sign in to comment.