Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

flate: Improve level 1 speed by ~10% #482

Merged
merged 1 commit into from Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion flate/fast_encoder.go
Expand Up @@ -179,7 +179,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
if debugDecode {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
Expand Down
4 changes: 2 additions & 2 deletions flate/huffman_bit_writer.go
Expand Up @@ -833,9 +833,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes

for _, t := range tokens {
if t < matchType {
if t < 256 {
//w.writeCode(lits[t.literal()])
c := lits[t.literal()]
c := lits[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
Expand Down
56 changes: 53 additions & 3 deletions flate/level1.go
@@ -1,6 +1,10 @@
package flate

import "fmt"
import (
"encoding/binary"
"fmt"
"math/bits"
)

// fastGen maintains the table for matches,
// and the previous byte block for level 2.
Expand Down Expand Up @@ -116,7 +120,32 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {

// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
var l = int32(4)
if false {
l = e.matchlenLong(s+4, t+4, src) + 4
} else {
// inlined:
a := src[s+4:]
b := src[t+4:]
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
l += int32(bits.TrailingZeros64(diff) >> 3)
break
}
l += 8
a = a[8:]
b = b[8:]
}
if len(a) < 8 {
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
break
}
l++
}
}
}

// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
Expand All @@ -129,7 +158,28 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}

// Save the match found
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
if false {
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
} else {
// Inlined...
xoffset := uint32(s - t - baseMatchOffset)
xlength := l
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
xl = 258 - baseMatchLength
}
xlength -= xl
xl -= baseMatchLength
dst.extraHist[lengthCodes1[uint8(xl)]]++
dst.offHist[oc]++
dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
dst.n++
}
}
s += l
nextEmit = s
if nextS >= s {
Expand Down
4 changes: 2 additions & 2 deletions flate/token.go
Expand Up @@ -276,7 +276,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
xoffset |= oCode << 16

t.extraHist[lengthCodes1[uint8(xlength)]]++
t.offHist[oCode]++
t.offHist[oCode&31]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.n++
}
Expand All @@ -300,7 +300,7 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
xlength -= xl
xl -= baseMatchLength
t.extraHist[lengthCodes1[uint8(xl)]]++
t.offHist[oc]++
t.offHist[oc&31]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
Expand Down