Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deflate: Separate code and lengths #619

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
163 changes: 79 additions & 84 deletions flate/huffman_bit_writer.go
Expand Up @@ -166,27 +166,27 @@ func (w *huffmanBitWriter) reset(writer io.Writer) {

func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
a := t.offHist[:offsetCodeCount]
b := w.offsetEncoding.codes
b := w.offsetEncoding.codes.len
b = b[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i] == 0 {
return false
}
}

a = t.extraHist[:literalCount-256]
b = w.literalEncoding.codes[256:literalCount]
b = w.literalEncoding.codes.len[256:literalCount]
b = b[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i] == 0 {
return false
}
}

a = t.litHist[:256]
b = w.literalEncoding.codes[:len(a)]
b = w.literalEncoding.codes.len[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i] == 0 {
return false
}
}
Expand All @@ -200,7 +200,7 @@ func (w *huffmanBitWriter) flush() {
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
}
n := w.nbytes
Expand Down Expand Up @@ -280,12 +280,12 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
cgnl[i] = litEnc.codes.len[i]
}

cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
cgnl[i] = offEnc.codes.len[i]
}
codegen[numLiterals+numOffsets] = badCode

Expand Down Expand Up @@ -426,10 +426,10 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
return 0, false
}

func (w *huffmanBitWriter) writeCode(c hcode) {
func (w *huffmanBitWriter) writeCode(c hcode, idx int) {
// The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
w.bits |= uint64(c.code[idx]) << (w.nbits & 63)
w.nbits += c.len[idx]
if w.nbits >= 48 {
w.writeOutBits()
}
Expand Down Expand Up @@ -477,18 +477,18 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
w.writeBits(int32(numCodegens-4), 4)

for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
value := uint(w.codegenEncoding.codes.len[codegenOrder[i]])
w.writeBits(int32(value), 3)
}

i := 0
for {
var codeWord = uint32(w.codegen[i])
var codeWord = int(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[codeWord])
w.writeCode(w.codegenEncoding.codes, codeWord)

switch codeWord {
case 16:
Expand All @@ -513,7 +513,7 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
}

Expand Down Expand Up @@ -542,7 +542,7 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
}

Expand All @@ -567,7 +567,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
tokens.AddEOB()
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, false)
Expand Down Expand Up @@ -638,7 +638,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
// We cannot reuse pure huffman table, and must mark as EOF.
if (w.lastHuffMan || eof) && w.lastHeader > 0 {
// We will not try to reuse.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
w.lastHuffMan = false
}
Expand All @@ -650,7 +650,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b

// Check if we can reuse...
if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
}

Expand All @@ -670,7 +670,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty
newSize += int(w.literalEncoding.codes.len[endBlockMarker]) + newSize>>w.logNewTablePenalty

// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
Expand All @@ -679,7 +679,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
// Check if a new table is better.
if newSize < reuseSize {
// Write the EOB we owe.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
size = newSize
w.lastHeader = 0
} else {
Expand Down Expand Up @@ -826,7 +826,7 @@ func (w *huffmanBitWriter) generate() {

// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes hcode) {
if w.err != nil {
return
}
Expand All @@ -842,20 +842,23 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}

// Create slices up to the next power of two to avoid bounds checks.
lits := leCodes[:256]
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]

litLens := leCodes.len[:256]
litCodes := leCodes.code[:256]
offLens := oeCodes.len[:32]
offCodes := oeCodes.code[:32]

llLengths := leCodes.len[lengthCodesStart:]
llLengths = llLengths[:32]
llCodes := leCodes.code[lengthCodesStart:]
llCodes = llCodes[:32]
// Go 1.16 LOVES having these on stack.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes

for _, t := range tokens {
if t < 256 {
//w.writeCode(lits[t.literal()])
c := lits[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= uint64(litCodes[t]) << (nbits & 63)
nbits += litLens[t]
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
Expand All @@ -877,27 +880,23 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
// Write the length
length := t.length()
lengthCode := lengthCode(length) & 31
if false {
w.writeCode(lengths[lengthCode])
} else {
// inlined
c := lengths[lengthCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])

// inlined
bits |= uint64(llCodes[lengthCode]) << (nbits & 63)
nbits += llLengths[lengthCode]
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}

Expand Down Expand Up @@ -926,27 +925,22 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
// Write the offset
offset := t.offset()
offsetCode := (offset >> 16) & 31
if false {
w.writeCode(offs[offsetCode])
} else {
// inlined
c := offs[offsetCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
// inlined
bits |= uint64(offCodes[offsetCode]) << (nbits & 63)
nbits += offLens[offsetCode]
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}

Expand Down Expand Up @@ -976,7 +970,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes

if deferEOB {
w.writeCode(leCodes[endBlockMarker])
w.writeCode(leCodes, endBlockMarker)
}
}

Expand Down Expand Up @@ -1077,7 +1071,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
}
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
} else if debugDeflate {
fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
Expand All @@ -1104,7 +1098,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}

encoding := w.literalEncoding.codes[:256]
encodingLens := w.literalEncoding.codes.len[:256]
encodingCodes := w.literalEncoding.codes.code[:256]

// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes

Expand Down Expand Up @@ -1133,13 +1129,13 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
a, b := encoding[input[0]], encoding[input[1]]
bits |= uint64(a.code) << (nbits & 63)
bits |= uint64(b.code) << ((nbits + a.len) & 63)
c := encoding[input[2]]
nbits += b.len + a.len
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
c, b, a := input[2], input[1], input[0]
aL, bL := encodingLens[a], encodingLens[b]
bits |= uint64(encodingCodes[a]) << (nbits & 63)
bits |= uint64(encodingCodes[b]) << ((nbits + aL) & 63)
nbits += aL + bL
bits |= uint64(encodingCodes[c]) << (nbits & 63)
nbits += encodingLens[c]
input = input[3:]
}

Expand All @@ -1164,11 +1160,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}
// Bitwriting inlined, ~30% speedup
c := encoding[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= uint64(encodingCodes[t]) << (nbits & 63)
nbits += encodingLens[t]
if debugDeflate {
count += int(c.len)
count += int(encodingCodes[t])
}
}
// Restore...
Expand All @@ -1184,7 +1179,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}

if eof || sync {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.writeCode(w.literalEncoding.codes, endBlockMarker)
w.lastHeader = 0
w.lastHuffMan = false
}
Expand Down