Skip to content

Commit

Permalink
retain the old NEON rotations in inline comments
Browse files Browse the repository at this point in the history
  • Loading branch information
oconnor663 committed Jul 5, 2023
1 parent 7038dad commit f7e1a74
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions c/blake3_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,22 @@ INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
}

INLINE uint32x4_t rot16_128(uint32x4_t x) {
// The straightfoward implementation would be two shifts and an or, but that's
// slower on microarchitectures we've tested. See
// https://github.com/BLAKE3-team/BLAKE3/pull/319.
// return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16));
return vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x)));
}

INLINE uint32x4_t rot12_128(uint32x4_t x) {
// See comment in rot16_128.
// return vorrq_u32(vshrq_n_u32(x, 12), vshlq_n_u32(x, 32 - 12));
return vsriq_n_u32(vshlq_n_u32(x, 32-12), x, 12);
}

INLINE uint32x4_t rot8_128(uint32x4_t x) {
// See comment in rot16_128.
// return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8));
#if defined(__clang__)
return vreinterpretq_u32_u8(__builtin_shufflevector(vreinterpretq_u8_u32(x), vreinterpretq_u8_u32(x), 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12));
#elif __GNUC__ * 10000 + __GNUC_MINOR__ * 100 >=40700
Expand All @@ -55,6 +63,8 @@ INLINE uint32x4_t rot8_128(uint32x4_t x) {
}

INLINE uint32x4_t rot7_128(uint32x4_t x) {
// See comment in rot16_128.
// return vorrq_u32(vshrq_n_u32(x, 7), vshlq_n_u32(x, 32 - 7));
return vsriq_n_u32(vshlq_n_u32(x, 32-7), x, 7);
}

Expand Down

0 comments on commit f7e1a74

Please sign in to comment.