diff --git a/zstd/enc_best.go b/zstd/enc_best.go index 830f5ba74a..07f657d36e 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -32,7 +32,6 @@ type match struct { length int32 rep int32 est int32 - _ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes } const highScore = 25000 @@ -189,12 +188,6 @@ encodeLoop: panic("offset0 was 0") } - bestOf := func(a, b *match) *match { - if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 { - return a - } - return b - } const goodEnough = 100 nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) @@ -202,40 +195,41 @@ encodeLoop: candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] - matchAt := func(offset int32, s int32, first uint32, rep int32) match { + // Set m to a match at offset if it looks like that will improve compression. + improve := func(m *match, offset int32, s int32, first uint32, rep int32) { if s-offset >= e.maxMatchOff || load3232(src, offset) != first { - return match{s: s, est: highScore} + return } if debugAsserts { if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) } } - m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} - m.estBits(bitsPerByte) - return m + cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + cand.estBits(bitsPerByte) + if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { + *m = cand + } } - m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) - m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) - m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) - m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1) - best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4)) + best := match{s: s, est: highScore} + improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) if canRepeat && best.length < goodEnough { cv32 := uint32(cv >> 8) spp := s + 1 - m1 := matchAt(spp-offset1, spp, cv32, 1) - m2 := matchAt(spp-offset2, spp, cv32, 2) - m3 := matchAt(spp-offset3, spp, cv32, 3) - best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) if best.length > 0 { cv32 = uint32(cv >> 24) spp += 2 - m1 := matchAt(spp-offset1, spp, cv32, 1) - m2 := matchAt(spp-offset2, spp, cv32, 2) - m3 := matchAt(spp-offset3, spp, cv32, 3) - best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) } } // Load next and check... @@ -262,18 +256,16 @@ encodeLoop: candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] // Short at s+1 - m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) // Long at s+1, s+2 - m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) - m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) - m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1) - m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1) - best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5)) + improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) + improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1) + improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1) if false { // Short at s+3. // Too often worse... - m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) - best = bestOf(best, &m) + improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) } // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. @@ -284,13 +276,10 @@ encodeLoop: // For this compression level 2 yields the best results. const skipBeginning = 2 if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 { - m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - bestEnd := bestOf(best, &m) + improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 { - m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - bestEnd = bestOf(bestEnd, &m) + improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) } - best = bestEnd } } }