Skip to content

Commit

Permalink
zstd: Use individual reset threshold (#703)
Browse files Browse the repository at this point in the history
* zstd: Use individual reset threshold

Instead of setting the limit to the largest window size, set it to the chosen one.

```
λ benchcmp before.txt after.txt
benchmark                                         old ns/op     new ns/op     delta
BenchmarkEncoder_EncodeAllSimple4K/fastest-32     3145          3133          -0.38%
BenchmarkEncoder_EncodeAllSimple4K/default-32     41485         40624         -2.08%
BenchmarkEncoder_EncodeAllSimple4K/better-32      49352         49197         -0.31%
BenchmarkEncoder_EncodeAllSimple4K/best-32        421522        407392        -3.35%

benchmark                                         old MB/s     new MB/s     speedup
BenchmarkEncoder_EncodeAllSimple4K/fastest-32     1302.48      1307.39      1.00x
BenchmarkEncoder_EncodeAllSimple4K/default-32     98.74        100.83       1.02x
BenchmarkEncoder_EncodeAllSimple4K/better-32      83.00        83.26        1.00x
BenchmarkEncoder_EncodeAllSimple4K/best-32        9.72         10.05        1.03x
```
  • Loading branch information
klauspost committed Nov 30, 2022
1 parent d3349be commit dfaad36
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 26 deletions.
7 changes: 4 additions & 3 deletions zstd/enc_base.go
Expand Up @@ -16,6 +16,7 @@ type fastBase struct {
cur int32
// maximum offset. Should be at least 2x block size.
maxMatchOff int32
bufferReset int32
hist []byte
crc *xxhash.Digest
tmp [8]byte
Expand Down Expand Up @@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
}

func (e *fastBase) addBlock(src []byte) int32 {
if debugAsserts && e.cur > bufferReset {
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
if debugAsserts && e.cur > e.bufferReset {
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
}
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
Expand Down Expand Up @@ -154,7 +155,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {

// We offset current position so everything will be out of reach.
// If above reset line, history will be purged.
if e.cur < bufferReset {
if e.cur < e.bufferReset {
e.cur += e.maxMatchOff + int32(len(e.hist))
}
e.hist = e.hist[:0]
Expand Down
2 changes: 1 addition & 1 deletion zstd/enc_best.go
Expand Up @@ -85,7 +85,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
e.table = [bestShortTableSize]prevEntry{}
e.longTable = [bestLongTableSize]prevEntry{}
Expand Down
4 changes: 2 additions & 2 deletions zstd/enc_better.go
Expand Up @@ -62,7 +62,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
e.table = [betterShortTableSize]tableEntry{}
e.longTable = [betterLongTableSize]prevEntry{}
Expand Down Expand Up @@ -583,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
Expand Down
8 changes: 4 additions & 4 deletions zstd/enc_dfast.go
Expand Up @@ -44,7 +44,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
e.table = [dFastShortTableSize]tableEntry{}
e.longTable = [dFastLongTableSize]tableEntry{}
Expand Down Expand Up @@ -384,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
if e.cur >= bufferReset {
if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
Expand Down Expand Up @@ -681,7 +681,7 @@ encodeLoop:
}

// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
Expand All @@ -696,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
Expand Down
8 changes: 4 additions & 4 deletions zstd/enc_fast.go
Expand Up @@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
)

// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
Expand Down Expand Up @@ -310,7 +310,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
}

// Protect against e.cur wraparound.
if e.cur >= bufferReset {
if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
Expand Down Expand Up @@ -538,7 +538,7 @@ encodeLoop:
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
Expand All @@ -555,7 +555,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
return
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
e.table = [tableSize]tableEntry{}
e.cur = e.maxMatchOff
Expand Down
15 changes: 8 additions & 7 deletions zstd/encoder_options.go
Expand Up @@ -3,6 +3,7 @@ package zstd
import (
"errors"
"fmt"
"math"
"runtime"
"strings"
)
Expand Down Expand Up @@ -47,22 +48,22 @@ func (o encoderOptions) encoder() encoder {
switch o.level {
case SpeedFastest:
if o.dict != nil {
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}

case SpeedDefault:
if o.dict != nil {
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
}
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
case SpeedBetterCompression:
if o.dict != nil {
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
case SpeedBestCompression:
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
Expand Down
4 changes: 2 additions & 2 deletions zstd/fuzz_test.go
Expand Up @@ -181,8 +181,8 @@ func FuzzEncoding(f *testing.F) {
// Just test if we crash...
defer func() {
if r := recover(); r != nil {
rdebug.PrintStack()
t.Fatal(r)
stack := rdebug.Stack()
t.Fatalf("%v:\n%v", r, string(stack))
}
}()
if len(data) > maxSize {
Expand Down
3 changes: 0 additions & 3 deletions zstd/zstd.go
Expand Up @@ -36,9 +36,6 @@ const forcePreDef = false
// zstdMinMatch is the minimum zstd match length.
const zstdMinMatch = 3

// Reset the buffer offset when reaching this.
const bufferReset = math.MaxInt32 - MaxWindowSize

// fcsUnknown is used for unknown frame content size.
const fcsUnknown = math.MaxUint64

Expand Down

0 comments on commit dfaad36

Please sign in to comment.