From 43829fcbc2c042d8da29a97f784ab22ca44581ae Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 5 Feb 2022 04:36:01 -0800 Subject: [PATCH] flate: Improve huffman generation speed ~5-10% faster (#490) Mainly gains for small blocks & fastest levels. ``` benchmark old ns/op new ns/op delta BenchmarkEncodeDigitsConstant1e4-32 16080 15984 -0.60% BenchmarkEncodeDigitsSpeed1e4-32 48105 45890 -4.60% BenchmarkEncodeDigitsDefault1e4-32 114016 111234 -2.44% BenchmarkEncodeDigitsCompress1e4-32 228570 225041 -1.54% BenchmarkEncodeDigitsSL1e4-32 46695 45006 -3.62% BenchmarkEncodeTwainConstant1e4-32 23557 22033 -6.47% BenchmarkEncodeTwainSpeed1e4-32 68594 63177 -7.90% BenchmarkEncodeTwainDefault1e4-32 112498 111798 -0.62% BenchmarkEncodeTwainCompress1e4-32 282909 274804 -2.86% BenchmarkEncodeTwainSL1e4-32 68406 62362 -8.84% benchmark old MB/s new MB/s speedup BenchmarkEncodeDigitsConstant1e4-32 621.89 625.64 1.01x BenchmarkEncodeDigitsSpeed1e4-32 207.88 217.91 1.05x BenchmarkEncodeDigitsDefault1e4-32 87.71 89.90 1.02x BenchmarkEncodeDigitsCompress1e4-32 43.75 44.44 1.02x BenchmarkEncodeDigitsSL1e4-32 214.16 222.19 1.04x BenchmarkEncodeTwainConstant1e4-32 424.51 453.87 1.07x BenchmarkEncodeTwainSpeed1e4-32 145.79 158.29 1.09x BenchmarkEncodeTwainDefault1e4-32 88.89 89.45 1.01x BenchmarkEncodeTwainCompress1e4-32 35.35 36.39 1.03x BenchmarkEncodeTwainSL1e4-32 146.19 160.35 1.10x ``` (output is byte exact) --- flate/huffman_code.go | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 9989830b53..9ab497c275 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -188,14 +188,19 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // of the level j ancestor. var leafCounts [maxBitsLimit][maxBitsLimit]int32 + // Descending to only have 1 bounds check. + l2f := int32(list[2].freq) + l1f := int32(list[1].freq) + l0f := int32(list[0].freq) + int32(list[1].freq) + for level := int32(1); level <= maxBits; level++ { // For every level, the first two items are the first two characters. // We initialize the levels as if we had already figured this out. levels[level] = levelInfo{ level: level, - lastFreq: int32(list[1].freq), - nextCharFreq: int32(list[2].freq), - nextPairFreq: int32(list[0].freq) + int32(list[1].freq), + lastFreq: l1f, + nextCharFreq: l2f, + nextPairFreq: l0f, } leafCounts[level][level] = 2 if level == 1 { @@ -206,8 +211,8 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // We need a total of 2*n - 2 items at top level and have already generated 2. levels[maxBits].needed = 2*n - 4 - level := maxBits - for { + level := uint32(maxBits) + for level < 16 { l := &levels[level] if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { // We've run out of both leafs and pairs. @@ -239,7 +244,13 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // more values in the level below l.lastFreq = l.nextPairFreq // Take leaf counts from the lower level, except counts[level] remains the same. - copy(leafCounts[level][:level], leafCounts[level-1][:level]) + if true { + save := leafCounts[level][level] + leafCounts[level] = leafCounts[level-1] + leafCounts[level][level] = save + } else { + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + } levels[l.level-1].needed = 2 } @@ -310,6 +321,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // maxBits The maximum number of bits to use for any literal. func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { list := h.freqcache[:len(freq)+1] + codes := h.codes[:len(freq)] // Number of non-zero literals count := 0 // Set list to be the set of all non-zero literals and their frequencies @@ -318,11 +330,10 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { list[count] = literalNode{uint16(i), f} count++ } else { - list[count] = literalNode{} - h.codes[i].len = 0 + codes[i].len = 0 } } - list[len(freq)] = literalNode{} + list[count] = literalNode{} list = list[:count] if count <= 2 {