Skip to content

Commit

Permalink
Improve incompressible data speed (#491)
Browse files Browse the repository at this point in the history
Improve speed for zstd fastest and huffman-only compression of random data.

flate: 590.71MB/s -> 1512.99MB/s
zstd l1: 1840.07MB/s -> 2489.23MB/s
  • Loading branch information
klauspost committed Feb 7, 2022
1 parent 43829fc commit b5a73a1
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
21 changes: 20 additions & 1 deletion flate/huffman_bit_writer.go
Expand Up @@ -1008,6 +1008,26 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
histogram(input, w.literalFreq[:numLiterals], fill)
ssize, storable := w.storedSize(input)
if storable && len(input) > 1024 {
// Quick check for incompressible content.
abs := float64(0)
avg := float64(len(input)) / 256
max := float64(len(input) * 2)
for _, v := range w.literalFreq[:256] {
diff := float64(v) - avg
abs += diff * diff
if abs > max {
break
}
}
if abs < max {
// No chance we can compress this...
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
}
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
Expand All @@ -1025,7 +1045,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
estBits += estBits >> w.logNewTablePenalty

// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize <= estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
Expand Down
4 changes: 2 additions & 2 deletions zstd/enc_fast.go
Expand Up @@ -85,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 7
const kSearchStrength = 6

// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
Expand Down Expand Up @@ -334,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 8
const kSearchStrength = 6

// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
Expand Down

0 comments on commit b5a73a1

Please sign in to comment.