Skip to content

Commit

Permalink
zstd: Select best match using selection trees (#706)
Browse files Browse the repository at this point in the history
* zstd: Select best match using selection trees

name                              old speed      new speed      delta
Encoder_EncodeAllSimple/best-8    12.2MB/s ± 1%  13.5MB/s ± 3%  +10.55%  (p=0.000 n=20+19)
Encoder_EncodeAllSimple4K/best-8  10.5MB/s ± 1%  11.9MB/s ± 1%  +13.52%  (p=0.000 n=20+19)

name                              old alloc/op   new alloc/op   delta
Encoder_EncodeAllSimple/best-8       18.0B ± 0%     16.0B ± 0%  -11.11%  (p=0.000 n=18+17)
Encoder_EncodeAllSimple4K/best-8     1.00B ± 0%     1.00B ± 0%     ~     (all equal)

* zstd: Track best match in best encoder by pointer

As long as matchAt does not return a pointer, escape analysis determines
that the matches can stay on the stack. This works in Go 1.17, too.

name                              old speed      new speed      delta
Encoder_EncodeAllSimple/best-8    13.5MB/s ± 3%  15.3MB/s ± 2%  +13.35%  (p=0.000 n=19+19)
Encoder_EncodeAllSimple4K/best-8  11.9MB/s ± 1%  12.9MB/s ± 0%   +8.38%  (p=0.000 n=19+17)

name                              old alloc/op   new alloc/op   delta
Encoder_EncodeAllSimple/best-8       16.0B ± 0%     14.0B ± 0%  -12.50%  (p=0.000 n=17+20)
Encoder_EncodeAllSimple4K/best-8     1.00B ± 0%     1.00B ± 0%     ~     (all equal)
  • Loading branch information
greatroar committed Dec 4, 2022
1 parent 48791b0 commit a0c1f61
Showing 1 changed file with 26 additions and 18 deletions.
44 changes: 26 additions & 18 deletions zstd/enc_best.go
Expand Up @@ -189,7 +189,7 @@ encodeLoop:
panic("offset0 was 0")
}

bestOf := func(a, b match) match {
bestOf := func(a, b *match) *match {
if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
return a
}
Expand All @@ -216,22 +216,26 @@ encodeLoop:
return m
}

best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))

if canRepeat && best.length < goodEnough {
cv32 := uint32(cv >> 8)
spp := s + 1
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
if best.length > 0 {
cv32 = uint32(cv >> 24)
spp += 2
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
}
}
// Load next and check...
Expand All @@ -258,16 +262,18 @@ encodeLoop:
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

// Short at s+1
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
// Long at s+1, s+2
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
if false {
// Short at s+3.
// Too often worse...
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
best = bestOf(best, &m)
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
Expand All @@ -278,9 +284,11 @@ encodeLoop:
// For this compression level 2 yields the best results.
const skipBeginning = 2
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
bestEnd := bestOf(best, &m)
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
bestEnd = bestOf(bestEnd, &m)
}
best = bestEnd
}
Expand Down

0 comments on commit a0c1f61

Please sign in to comment.