Skip to content

Commit

Permalink
s2: Improve "best" compression (#658)
Browse files Browse the repository at this point in the history
Allow skipping bytes in the beginning. Since we already have pretty good matches we aren't too likely to find better.

Improvement in the 0.0x%->0.10 range.
  • Loading branch information
klauspost committed Sep 8, 2022
1 parent a63f67b commit 463e7df
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions s2/encode_best.go
Expand Up @@ -177,21 +177,38 @@ func encodeBlockBest(dst, src []byte) (d int) {
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
}
// Search for a match at best match end, see if that is better.
if sAt := best.s + best.length; sAt < sLimit {
sBack := best.s
backL := best.length
// Allow some bytes at the beginning to mismatch.
// Sweet spot is around 1-2 bytes, but depends on input.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
const skipEnd = 1
if sAt := best.s + best.length - skipEnd; sAt < sLimit {

sBack := best.s + skipBeginning - skipEnd
backL := best.length - skipBeginning
// Load initial values
cv = load64(src, sBack)
// Search for mismatch

// Grab candidates...
next := lTable[hash8(load64(src, sAt), lTableBits)]
//next := sTable[hash4(load64(src, sAt), sTableBits)]

if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
// Disabled: Extremely small gain
if false {
next = sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
}
}
}
}
Expand Down

0 comments on commit 463e7df

Please sign in to comment.