Skip to content

Commit

Permalink
zstd: Add noasm tag for xxhash (#475)
Browse files Browse the repository at this point in the history
  • Loading branch information
klauspost committed Jan 19, 2022
1 parent 505dc06 commit 1d64431
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 45 deletions.
1 change: 1 addition & 0 deletions zstd/internal/xxhash/xxhash_amd64.s
@@ -1,6 +1,7 @@
// +build !appengine
// +build gc
// +build !purego
// +build !noasm

#include "textflag.h"

Expand Down
81 changes: 39 additions & 42 deletions zstd/internal/xxhash/xxhash_arm64.s
@@ -1,13 +1,13 @@
// +build gc,!purego
// +build gc,!purego,!noasm

#include "textflag.h"

// Register allocation.
#define digest R1
#define h R2 // Return value.
#define p R3 // Input pointer.
#define h R2 // Return value.
#define p R3 // Input pointer.
#define len R4
#define nblocks R5 // len / 32.
#define nblocks R5 // len / 32.
#define prime1 R7
#define prime2 R8
#define prime3 R9
Expand All @@ -22,50 +22,48 @@
#define x3 R22
#define x4 R23

#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \

// x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \

#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \
#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \

// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \

#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \

// The primes are repeated here to ensure that they're stored
// in a contiguous array, so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40


// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
LDP b_base+0(FP), (p, len)
LDP b_base+0(FP), (p, len)

LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
Expand Down Expand Up @@ -156,32 +154,31 @@ try1:

end:
EOR h >> 33, h
MUL prime2, h
MUL prime2, h
EOR h >> 29, h
MUL prime3, h
MUL prime3, h
EOR h >> 32, h

MOVD h, ret+24(FP)
RET


// func writeBlocks(d *Digest, b []byte) int
//
// Assumes len(b) >= 32.
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
LDP primes<>(SB), (prime1, prime2)
LDP primes<>(SB), (prime1, prime2)

// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)

LDP b_base+8(FP), (p, len)

blocksLoop()

// Store updated state.
STP (v1, v2), 0(digest)
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)

BIC $31, len
Expand Down
3 changes: 2 additions & 1 deletion zstd/internal/xxhash/xxhash_asm.go
@@ -1,8 +1,9 @@
//go:build (amd64 || arm64) && !appengine && gc && !purego
//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
// +build !noasm

package xxhash

Expand Down
4 changes: 2 additions & 2 deletions zstd/internal/xxhash/xxhash_other.go
@@ -1,5 +1,5 @@
//go:build (!amd64 && !arm64) || appengine || !gc || purego
// +build !amd64,!arm64 appengine !gc purego
//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
// +build !amd64,!arm64 appengine !gc purego noasm

package xxhash

Expand Down

0 comments on commit 1d64431

Please sign in to comment.