Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve incompressible data speed #491

Merged
merged 1 commit into from Feb 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 20 additions & 1 deletion flate/huffman_bit_writer.go
Expand Up @@ -1008,6 +1008,26 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
histogram(input, w.literalFreq[:numLiterals], fill)
ssize, storable := w.storedSize(input)
if storable && len(input) > 1024 {
// Quick check for incompressible content.
abs := float64(0)
avg := float64(len(input)) / 256
max := float64(len(input) * 2)
for _, v := range w.literalFreq[:256] {
diff := float64(v) - avg
abs += diff * diff
if abs > max {
break
}
}
if abs < max {
// No chance we can compress this...
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
}
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
Expand All @@ -1025,7 +1045,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
estBits += estBits >> w.logNewTablePenalty

// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize <= estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
Expand Down
4 changes: 2 additions & 2 deletions zstd/enc_fast.go
Expand Up @@ -85,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 7
const kSearchStrength = 6

// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
Expand Down Expand Up @@ -334,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 8
const kSearchStrength = 6

// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
Expand Down