Skip to content

Commit

Permalink
flate: Faster token writer, 1-2% on fastest (#489)
Browse files Browse the repository at this point in the history
* flate: Faster token writer
* Move masking.
* Update docs
  • Loading branch information
klauspost committed Feb 5, 2022
1 parent a1a9cfc commit 24a2710
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 30 deletions.
42 changes: 24 additions & 18 deletions flate/huffman_bit_writer.go
Expand Up @@ -36,8 +36,11 @@ const (
bufferSize = bufferFlushSize + 8
)

// Minimum length code that emits bits.
const lengthExtraBitsMinCode = 8

// The number of extra bits needed by length code X - LENGTH_CODES_START.
var lengthExtraBits = [32]int8{
var lengthExtraBits = [32]uint8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
Expand All @@ -51,6 +54,9 @@ var lengthBase = [32]uint8{
64, 80, 96, 112, 128, 160, 192, 224, 255,
}

// Minimum offset code that emits bits.
const offsetExtraBitsMinCode = 4

// offset code word extra bits.
var offsetExtraBits = [32]int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
Expand Down Expand Up @@ -78,10 +84,10 @@ func init() {

for i := range offsetCombined[:] {
// Don't use extended window values...
if offsetBase[i] > 0x006000 {
if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
continue
}
offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
}
}

Expand All @@ -97,7 +103,7 @@ type huffmanBitWriter struct {
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
nbits uint16
nbits uint8
nbytes uint8
lastHuffMan bool
literalEncoding *huffmanEncoder
Expand Down Expand Up @@ -215,7 +221,7 @@ func (w *huffmanBitWriter) write(b []byte) {
_, w.err = w.writer.Write(b)
}

func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
Expand Down Expand Up @@ -858,12 +864,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)

// Write the length
length := t.length()
lengthCode := lengthCode(length)
lengthCode := lengthCode(length) & 31
if false {
w.writeCode(lengths[lengthCode&31])
w.writeCode(lengths[lengthCode])
} else {
// inlined
c := lengths[lengthCode&31]
c := lengths[lengthCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
Expand All @@ -883,10 +889,10 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
}

extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
if lengthCode >= lengthExtraBitsMinCode {
extraLengthBits := lengthExtraBits[lengthCode]
//w.writeBits(extraLength, extraLengthBits)
extraLength := int32(length - lengthBase[lengthCode&31])
extraLength := int32(length - lengthBase[lengthCode])
bits |= uint64(extraLength) << (nbits & 63)
nbits += extraLengthBits
if nbits >= 48 {
Expand All @@ -907,10 +913,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
// Write the offset
offset := t.offset()
offsetCode := offset >> 16
offset &= matchOffsetOnlyMask
offsetCode := (offset >> 16) & 31
if false {
w.writeCode(offs[offsetCode&31])
w.writeCode(offs[offsetCode])
} else {
// inlined
c := offs[offsetCode]
Expand All @@ -932,11 +937,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
}
}
offsetComb := offsetCombined[offsetCode]
if offsetComb > 1<<16 {

if offsetCode >= offsetExtraBitsMinCode {
offsetComb := offsetCombined[offsetCode]
//w.writeBits(extraOffset, extraOffsetBits)
bits |= uint64(offset-(offsetComb&0xffff)) << (nbits & 63)
nbits += uint16(offsetComb >> 16)
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
nbits += uint8(offsetComb)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
Expand Down
11 changes: 6 additions & 5 deletions flate/huffman_code.go
Expand Up @@ -17,7 +17,8 @@ const (

// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
code uint16
len uint8
}

type huffmanEncoder struct {
Expand Down Expand Up @@ -56,7 +57,7 @@ type levelInfo struct {
}

// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
func (h *hcode) set(code uint16, length uint8) {
h.len = length
h.code = code
}
Expand All @@ -80,7 +81,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint16
var size uint8
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
Expand All @@ -99,7 +100,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
codes[ch] = hcode{code: reverseBits(bits, size), len: size}
}
return h
}
Expand Down Expand Up @@ -296,7 +297,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN

sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)}
code++
}
list = list[0 : len(list)-int(bits)]
Expand Down
13 changes: 6 additions & 7 deletions flate/token.go
Expand Up @@ -13,11 +13,10 @@ import (
)

const (
// From top
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 5 bits offsetcode
// 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
// bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
// bits 16-22 offsetcode - 5 bits
// bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
// bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
Expand Down Expand Up @@ -356,8 +355,8 @@ func (t token) offset() uint32 { return uint32(t) & offsetMask }

func (t token) length() uint8 { return uint8(t >> lengthShift) }

// The code is never more than 8 bits, but is returned as uint32 for convenience.
func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
// Convert length to code.
func lengthCode(len uint8) uint8 { return lengthCodes[len] }

// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
Expand Down

0 comments on commit 24a2710

Please sign in to comment.