From 6caf5f7431ecc1a2f3b82665632e235407fa6c1b Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 8 Jun 2022 12:02:52 +0200 Subject: [PATCH] deflate: Use compound hcode ~5% Faster for huffman-only, same for rest. ``` github-ranks-backup.bin gzkp -2 1862623243 1298789681 5547 320.22 github-ranks-backup.bin gzkp -2 1862623243 1298789681 5305 334.83 ``` Replaces #619 - more speedup, and no regression. --- flate/huffman_bit_writer.go | 47 +++++++++++++++++++------------------ flate/huffman_code.go | 38 ++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 36 deletions(-) diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index 25f6d1108f..ff9f6f609a 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -169,7 +169,7 @@ func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { b := w.offsetEncoding.codes b = b[:len(a)] for i, v := range a { - if v != 0 && b[i].len == 0 { + if v != 0 && b[i].zero() { return false } } @@ -178,7 +178,7 @@ func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { b = w.literalEncoding.codes[256:literalCount] b = b[:len(a)] for i, v := range a { - if v != 0 && b[i].len == 0 { + if v != 0 && b[i].zero() { return false } } @@ -186,7 +186,7 @@ func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { a = t.litHist[:256] b = w.literalEncoding.codes[:len(a)] for i, v := range a { - if v != 0 && b[i].len == 0 { + if v != 0 && b[i].zero() { return false } } @@ -280,12 +280,12 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE // Copy the concatenated code sizes to codegen. Put a marker at the end. cgnl := codegen[:numLiterals] for i := range cgnl { - cgnl[i] = uint8(litEnc.codes[i].len) + cgnl[i] = litEnc.codes[i].len() } cgnl = codegen[numLiterals : numLiterals+numOffsets] for i := range cgnl { - cgnl[i] = uint8(offEnc.codes[i].len) + cgnl[i] = offEnc.codes[i].len() } codegen[numLiterals+numOffsets] = badCode @@ -428,8 +428,8 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { func (w *huffmanBitWriter) writeCode(c hcode) { // The function does not get inlined if we "& 63" the shift. - w.bits |= uint64(c.code) << (w.nbits & 63) - w.nbits += c.len + w.bits |= c.code64() << (w.nbits & 63) + w.nbits += c.len() if w.nbits >= 48 { w.writeOutBits() } @@ -477,7 +477,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n w.writeBits(int32(numCodegens-4), 4) for i := 0; i < numCodegens; i++ { - value := uint(w.codegenEncoding.codes[codegenOrder[i]].len) + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) w.writeBits(int32(value), 3) } @@ -670,7 +670,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b // Estimate size for using a new table. // Use the previous header size as the best estimate. newSize := w.lastHeader + tokens.EstimatedBits() - newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty + newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty // The estimated size is calculated as an optimal table. // We add a penalty to make it more realistic and re-use a bit more. @@ -854,8 +854,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) if t < 256 { //w.writeCode(lits[t.literal()]) c := lits[t] - bits |= uint64(c.code) << (nbits & 63) - nbits += c.len + bits |= c.code64() << (nbits & 63) + nbits += c.len() if nbits >= 48 { binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits @@ -882,8 +882,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) } else { // inlined c := lengths[lengthCode] - bits |= uint64(c.code) << (nbits & 63) - nbits += c.len + bits |= c.code64() << (nbits & 63) + nbits += c.len() if nbits >= 48 { binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits @@ -931,8 +931,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) } else { // inlined c := offs[offsetCode] - bits |= uint64(c.code) << (nbits & 63) - nbits += c.len + bits |= c.code64() << (nbits & 63) + nbits += c.len() if nbits >= 48 { binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits @@ -1134,12 +1134,12 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { nbytes = 0 } a, b := encoding[input[0]], encoding[input[1]] - bits |= uint64(a.code) << (nbits & 63) - bits |= uint64(b.code) << ((nbits + a.len) & 63) + bits |= a.code64() << (nbits & 63) + bits |= b.code64() << ((nbits + a.len()) & 63) c := encoding[input[2]] - nbits += b.len + a.len - bits |= uint64(c.code) << (nbits & 63) - nbits += c.len + nbits += b.len() + a.len() + bits |= c.code64() << (nbits & 63) + nbits += c.len() input = input[3:] } @@ -1165,10 +1165,11 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } // Bitwriting inlined, ~30% speedup c := encoding[t] - bits |= uint64(c.code) << (nbits & 63) - nbits += c.len + bits |= c.code64() << (nbits & 63) + + nbits += c.len() if debugDeflate { - count += int(c.len) + count += int(c.len()) } } // Restore... diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 9ab497c275..e0483bef3f 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -16,9 +16,18 @@ const ( ) // hcode is a huffman code with a bit code and bit length. -type hcode struct { - code uint16 - len uint8 +type hcode uint32 + +func (h hcode) len() uint8 { + return uint8(h) +} + +func (h hcode) code64() uint64 { + return uint64(h >> 8) +} + +func (h hcode) zero() bool { + return h == 0 } type huffmanEncoder struct { @@ -58,8 +67,11 @@ type levelInfo struct { // set sets the code and length of an hcode. func (h *hcode) set(code uint16, length uint8) { - h.len = length - h.code = code + *h = hcode(length) | (hcode(code) << 8) +} + +func newhcode(code uint16, length uint8) hcode { + return hcode(length) | (hcode(code) << 8) } func reverseBits(number uint16, bitLength byte) uint16 { @@ -100,7 +112,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder { bits = ch + 192 - 280 size = 8 } - codes[ch] = hcode{code: reverseBits(bits, size), len: size} + codes[ch] = newhcode(reverseBits(bits, size), size) } return h } @@ -109,7 +121,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder { h := newHuffmanEncoder(30) codes := h.codes for ch := range codes { - codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5} + codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5) } return h } @@ -121,7 +133,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { var total int for i, f := range freq { if f != 0 { - total += int(f) * int(h.codes[i].len) + total += int(f) * int(h.codes[i].len()) } } return total @@ -130,7 +142,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { func (h *huffmanEncoder) bitLengthRaw(b []byte) int { var total int for _, f := range b { - total += int(h.codes[f].len) + total += int(h.codes[f].len()) } return total } @@ -141,10 +153,10 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int { for i, f := range freq { if f != 0 { code := h.codes[i] - if code.len == 0 { + if code.zero() { return math.MaxInt32 } - total += int(f) * int(code.len) + total += int(f) * int(code.len()) } } return total @@ -308,7 +320,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN sortByLiteral(chunk) for _, node := range chunk { - h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)} + h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) code++ } list = list[0 : len(list)-int(bits)] @@ -330,7 +342,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { list[count] = literalNode{uint16(i), f} count++ } else { - codes[i].len = 0 + codes[i] = 0 } } list[count] = literalNode{}