diff --git a/zstd/enc_better.go b/zstd/enc_better.go index c769f6941d..d70e3fd3d3 100644 --- a/zstd/enc_better.go +++ b/zstd/enc_better.go @@ -416,15 +416,23 @@ encodeLoop: // Try to find a better match by searching for a long match at the end of the current best match if s+matched < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is around 3 bytes, but depends on input. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 3 + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) - cv := load3232(src, s) + s2 := s + skipBeginning + cv := load3232(src, s2) candidateL := e.longTable[nextHashL] - coffsetL := candidateL.offset - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + coffsetL := candidateL.offset - e.cur - matched + skipBeginning + if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 if matchedNext > matched { t = coffsetL + s = s2 matched = matchedNext if debugMatches { println("long match at end-of-match") @@ -434,12 +442,13 @@ encodeLoop: // Check prev long... if true { - coffsetL = candidateL.prev - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + coffsetL = candidateL.prev - e.cur - matched + skipBeginning + if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 if matchedNext > matched { t = coffsetL + s = s2 matched = matchedNext if debugMatches { println("prev long match at end-of-match")