Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zstd: Add noasm tag for xxhash #475

Merged
merged 1 commit into from Jan 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions zstd/internal/xxhash/xxhash_amd64.s
@@ -1,6 +1,7 @@
// +build !appengine
// +build gc
// +build !purego
// +build !noasm

#include "textflag.h"

Expand Down
81 changes: 39 additions & 42 deletions zstd/internal/xxhash/xxhash_arm64.s
@@ -1,13 +1,13 @@
// +build gc,!purego
// +build gc,!purego,!noasm

#include "textflag.h"

// Register allocation.
#define digest R1
#define h R2 // Return value.
#define p R3 // Input pointer.
#define h R2 // Return value.
#define p R3 // Input pointer.
#define len R4
#define nblocks R5 // len / 32.
#define nblocks R5 // len / 32.
#define prime1 R7
#define prime2 R8
#define prime3 R9
Expand All @@ -22,50 +22,48 @@
#define x3 R22
#define x4 R23

#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \

// x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \

#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \
#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \

// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \

#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \

// The primes are repeated here to ensure that they're stored
// in a contiguous array, so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40


// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
LDP b_base+0(FP), (p, len)
LDP b_base+0(FP), (p, len)

LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
Expand Down Expand Up @@ -156,32 +154,31 @@ try1:

end:
EOR h >> 33, h
MUL prime2, h
MUL prime2, h
EOR h >> 29, h
MUL prime3, h
MUL prime3, h
EOR h >> 32, h

MOVD h, ret+24(FP)
RET


// func writeBlocks(d *Digest, b []byte) int
//
// Assumes len(b) >= 32.
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
LDP primes<>(SB), (prime1, prime2)
LDP primes<>(SB), (prime1, prime2)

// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)

LDP b_base+8(FP), (p, len)

blocksLoop()

// Store updated state.
STP (v1, v2), 0(digest)
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)

BIC $31, len
Expand Down
3 changes: 2 additions & 1 deletion zstd/internal/xxhash/xxhash_asm.go
@@ -1,8 +1,9 @@
//go:build (amd64 || arm64) && !appengine && gc && !purego
//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
// +build !noasm

package xxhash

Expand Down
4 changes: 2 additions & 2 deletions zstd/internal/xxhash/xxhash_other.go
@@ -1,5 +1,5 @@
//go:build (!amd64 && !arm64) || appengine || !gc || purego
// +build !amd64,!arm64 appengine !gc purego
//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
// +build !amd64,!arm64 appengine !gc purego noasm

package xxhash

Expand Down