Skip to content

Commit

Permalink
zstd: Improve "better" compresssion (#657)
Browse files Browse the repository at this point in the history
Skip 3 bytes when doing end-of-match search.

nyc: 529329260 -> 517666647 bytes

This allows better matches to be picked up further forward, adding up to 3 literals, but giving a better match.

Performance unchanged.
  • Loading branch information
klauspost committed Sep 7, 2022
1 parent b95ee96 commit a63f67b
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions zstd/enc_better.go
Expand Up @@ -416,15 +416,23 @@ encodeLoop:

// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is around 3 bytes, but depends on input.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 3

nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
s2 := s + skipBeginning
cv := load3232(src, s2)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
coffsetL := candidateL.offset - e.cur - matched + skipBeginning
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
s = s2
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
Expand All @@ -434,12 +442,13 @@ encodeLoop:

// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
coffsetL = candidateL.prev - e.cur - matched + skipBeginning
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
s = s2
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
Expand Down

0 comments on commit a63f67b

Please sign in to comment.