diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index 25f6d1108f..7906e938dd 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -1009,8 +1009,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } } - // Fill is rarely better... - const fill = false const numLiterals = endBlockMarker + 1 const numOffsets = 1 @@ -1019,7 +1017,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { // Assume header is around 70 bytes: // https://stackoverflow.com/a/25454430 const guessHeaderSizeBits = 70 * 8 - histogram(input, w.literalFreq[:numLiterals], fill) + histogram(input, w.literalFreq[:numLiterals]) ssize, storable := w.storedSize(input) if storable && len(input) > 1024 { // Quick check for incompressible content. @@ -1045,19 +1043,14 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } w.literalFreq[endBlockMarker] = 1 w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) - if fill { - // Clear fill... - for i := range w.literalFreq[:numLiterals] { - w.literalFreq[i] = 0 - } - histogram(input, w.literalFreq[:numLiterals], false) - } estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) - estBits += w.lastHeader - if w.lastHeader == 0 { - estBits += guessHeaderSizeBits + if estBits < math.MaxInt32 { + estBits += w.lastHeader + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty } - estBits += estBits >> w.logNewTablePenalty // Store bytes, if we don't get a reasonable improvement. if storable && ssize <= estBits { diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 9ab497c275..c0bcdad5be 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -364,21 +364,37 @@ func atLeastOne(v float32) float32 { return v } -// Unassigned values are assigned '1' in the histogram. -func fillHist(b []uint16) { - for i, v := range b { - if v == 0 { - b[i] = 1 +func histogram(b []byte, h []uint16) { + if true && len(b) >= 8<<10 { + // Split for bigger inputs + histogramSplit(b, h) + } else { + h = h[:256] + for _, t := range b { + h[t]++ } } } -func histogram(b []byte, h []uint16, fill bool) { +func histogramSplit(b []byte, h []uint16) { + // Tested, and slightly faster than 2-way. + // Writing to separate arrays and combining is also slightly slower. h = h[:256] - for _, t := range b { - h[t]++ + for len(b)&3 != 0 { + h[b[0]]++ + b = b[1:] } - if fill { - fillHist(h) + n := len(b) / 4 + x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] + y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] + for i, t := range x { + v0 := &h[t] + v1 := &h[y[i]] + v3 := &h[w[i]] + v2 := &h[z[i]] + *v0++ + *v1++ + *v2++ + *v3++ } }