diff --git a/flate/deflate.go b/flate/deflate.go index b27f5a93bc..1e9e41e315 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -10,9 +10,6 @@ import ( "fmt" "io" "math" - "math/bits" - - comp "github.com/klauspost/compress" ) const ( @@ -110,6 +107,7 @@ type advancedState struct { type compressor struct { compressionLevel + h *huffmanEncoder w *huffmanBitWriter // compression algorithm @@ -271,7 +269,7 @@ func (d *compressor) fillWindow(b []byte) { // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (length, offset int, ok bool) { +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { minMatchLook := maxMatchLength if lookahead < minMatchLook { minMatchLook = lookahead @@ -297,14 +295,19 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (lengt } offset = 0 + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 6 // Base is 4 bytes at with an additional cost. // Matches must be better than this. - cGain := minMatchLength*bpb - 12 + cGain := 0 for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) if n > length { - newGain := n*bpb - bits.Len32(uint32(pos-i)) + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) if newGain > cGain { length = n offset = pos - i @@ -389,10 +392,17 @@ func (d *compressor) deflateLazy() { if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return } - s.estBitsPerByte = 8 - if !d.sync { - s.estBitsPerByte = comp.ShannonEntropyBits(d.window[s.index:d.windowEnd]) - s.estBitsPerByte = int(1 + float64(s.estBitsPerByte)/float64(d.windowEnd-s.index)) + if d.windowEnd != s.index { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + for _, v := range d.window[s.index:d.windowEnd] { + tmp[v]++ + } + d.h.generate(tmp[:], 15) + //s.estBitsPerByte = d.h.bitLength(tmp[:])/(d.windowEnd-s.index) + 1 } s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) @@ -446,7 +456,7 @@ func (d *compressor) deflateLazy() { } if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead, s.estBitsPerByte); ok { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { s.length = newLength s.offset = newOffset } diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index fb1701eecc..fd49efd75b 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -52,18 +52,18 @@ var lengthBase = [32]uint8{ } // offset code word extra bits. -var offsetExtraBits = [64]int8{ +var offsetExtraBits = [32]int8{ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, /* extended window */ - 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, + 14, 14, } var offsetCombined = [32]uint32{} func init() { - var offsetBase = [64]uint32{ + var offsetBase = [32]uint32{ /* normal deflate */ 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, @@ -73,9 +73,7 @@ func init() { 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, /* extended window */ - 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, - 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, - 0x100000, 0x180000, 0x200000, 0x300000, + 0x008000, 0x00c000, } for i := range offsetCombined[:] { diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 67b2b38728..f35e00261d 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -129,9 +129,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { func (h *huffmanEncoder) bitLengthRaw(b []byte) int { var total int for _, f := range b { - if f != 0 { - total += int(h.codes[f].len) - } + total += int(h.codes[f].len) } return total } diff --git a/flate/token.go b/flate/token.go index eb862d7a92..3a9618ee19 100644 --- a/flate/token.go +++ b/flate/token.go @@ -129,11 +129,11 @@ var offsetCodes14 = [256]uint32{ type token uint32 type tokens struct { - nLits int extraHist [32]uint16 // codes 256->maxnumlit offHist [32]uint16 // offset codes litHist [256]uint16 // codes 0->255 - n uint16 // Must be able to contain maxStoreBlockSize + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize tokens [maxStoreBlockSize + 1]token } @@ -142,7 +142,7 @@ func (t *tokens) Reset() { return } t.n = 0 - t.nLits = 0 + t.nFilled = 0 for i := range t.litHist[:] { t.litHist[i] = 0 } @@ -161,12 +161,12 @@ func (t *tokens) Fill() { for i, v := range t.litHist[:] { if v == 0 { t.litHist[i] = 1 - t.nLits++ + t.nFilled++ } } for i, v := range t.extraHist[:literalCount-256] { if v == 0 { - t.nLits++ + t.nFilled++ t.extraHist[i] = 1 } } @@ -202,14 +202,12 @@ func emitLiteral(dst *tokens, lit []byte) { dst.litHist[v]++ } dst.n += uint16(len(lit)) - dst.nLits += len(lit) } func (t *tokens) AddLiteral(lit byte) { t.tokens[t.n] = token(lit) t.litHist[lit]++ t.n++ - t.nLits++ } // from https://stackoverflow.com/a/28730362 @@ -230,8 +228,9 @@ func (t *tokens) EstimatedBits() int { shannon := float32(0) bits := int(0) nMatches := 0 - if t.nLits > 0 { - invTotal := 1.0 / float32(t.nLits) + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) for _, v := range t.litHist[:] { if v > 0 { n := float32(v) @@ -275,7 +274,6 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { } oCode := offsetCode(xoffset) xoffset |= oCode << 16 - t.nLits++ t.extraHist[lengthCodes1[uint8(xlength)]]++ t.offHist[oCode]++ @@ -301,7 +299,6 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { } xlength -= xl xl -= baseMatchLength - t.nLits++ t.extraHist[lengthCodes1[uint8(xl)]]++ t.offHist[oc]++ t.tokens[t.n] = token(matchType | uint32(xl)<