Skip to content

Commit

Permalink
zstd: Improve "best" end search (#693)
Browse files Browse the repository at this point in the history
Check "best" end-of-match offset further forward.

Missed bytes are picked up later.

nyc-taxi-data: 474949772 -> 469436962
silesia: 60073508 -> 59993313
  • Loading branch information
klauspost committed Nov 18, 2022
1 parent 6f95269 commit c1e79a0
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions zstd/enc_best.go
Expand Up @@ -278,10 +278,13 @@ encodeLoop:
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
// Start check at a fixed offset to allow for a few mismatches.
// For this compression level 2 yields the best results.
const skipBeginning = 2
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
}
best = bestEnd
}
Expand Down

0 comments on commit c1e79a0

Please sign in to comment.