diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index 0b2e54972c..d55ea2a775 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -179,7 +179,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 { // matchlenLong will return the match length between offsets and t in src. // It is assumed that s > t, that t >=0 and s < len(src). func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { - if debugDecode { + if debugDeflate { if t >= s { panic(fmt.Sprint("t >=s:", t, s)) } diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index fd49efd75b..31a9092d90 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -833,9 +833,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits, nbits, nbytes := w.bits, w.nbits, w.nbytes for _, t := range tokens { - if t < matchType { + if t < 256 { //w.writeCode(lits[t.literal()]) - c := lits[t.literal()] + c := lits[t] bits |= uint64(c.code) << (nbits & 63) nbits += c.len if nbits >= 48 { diff --git a/flate/level1.go b/flate/level1.go index 1e5eea3968..0022c8bb6b 100644 --- a/flate/level1.go +++ b/flate/level1.go @@ -1,6 +1,10 @@ package flate -import "fmt" +import ( + "encoding/binary" + "fmt" + "math/bits" +) // fastGen maintains the table for matches, // and the previous byte block for level 2. @@ -116,7 +120,32 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // Extend the 4-byte match as long as possible. t := candidate.offset - e.cur - l := e.matchlenLong(s+4, t+4, src) + 4 + var l = int32(4) + if false { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else { + // inlined: + a := src[s+4:] + b := src[t+4:] + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + l += int32(bits.TrailingZeros64(diff) >> 3) + break + } + l += 8 + a = a[8:] + b = b[8:] + } + if len(a) < 8 { + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + break + } + l++ + } + } + } // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { @@ -129,7 +158,28 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } // Save the match found - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + if false { + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + } else { + // Inlined... + xoffset := uint32(s - t - baseMatchOffset) + xlength := l + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + xl = 258 - baseMatchLength + } + xlength -= xl + xl -= baseMatchLength + dst.extraHist[lengthCodes1[uint8(xl)]]++ + dst.offHist[oc]++ + dst.tokens[dst.n] = token(matchType | uint32(xl)<= s { diff --git a/flate/token.go b/flate/token.go index 3a9618ee19..ef69c05a1d 100644 --- a/flate/token.go +++ b/flate/token.go @@ -276,7 +276,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { xoffset |= oCode << 16 t.extraHist[lengthCodes1[uint8(xlength)]]++ - t.offHist[oCode]++ + t.offHist[oCode&31]++ t.tokens[t.n] = token(matchType | xlength<