Skip to content

Commit

Permalink
flate: Faster histograms (#620)
Browse files Browse the repository at this point in the history
Faster histogram for hufman only compression; 1% overall speedup.

```
github-ranks-backup.bin	gzkp	-2	1862623243	1298789681	5528	321.30
github-ranks-backup.bin	gzkp	-2	1862623243	1298789681	5471	324.63
```
  • Loading branch information
klauspost committed Jun 9, 2022
1 parent 352428d commit 03dccc3
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 24 deletions.
21 changes: 7 additions & 14 deletions flate/huffman_bit_writer.go
Expand Up @@ -1009,8 +1009,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}

// Fill is rarely better...
const fill = false
const numLiterals = endBlockMarker + 1
const numOffsets = 1

Expand All @@ -1019,7 +1017,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
histogram(input, w.literalFreq[:numLiterals], fill)
histogram(input, w.literalFreq[:numLiterals])
ssize, storable := w.storedSize(input)
if storable && len(input) > 1024 {
// Quick check for incompressible content.
Expand All @@ -1045,19 +1043,14 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
// Clear fill...
for i := range w.literalFreq[:numLiterals] {
w.literalFreq[i] = 0
}
histogram(input, w.literalFreq[:numLiterals], false)
}
estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
estBits += w.lastHeader
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
if estBits < math.MaxInt32 {
estBits += w.lastHeader
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
estBits += estBits >> w.logNewTablePenalty
}
estBits += estBits >> w.logNewTablePenalty

// Store bytes, if we don't get a reasonable improvement.
if storable && ssize <= estBits {
Expand Down
36 changes: 26 additions & 10 deletions flate/huffman_code.go
Expand Up @@ -364,21 +364,37 @@ func atLeastOne(v float32) float32 {
return v
}

// Unassigned values are assigned '1' in the histogram.
func fillHist(b []uint16) {
for i, v := range b {
if v == 0 {
b[i] = 1
func histogram(b []byte, h []uint16) {
if true && len(b) >= 8<<10 {
// Split for bigger inputs
histogramSplit(b, h)
} else {
h = h[:256]
for _, t := range b {
h[t]++
}
}
}

func histogram(b []byte, h []uint16, fill bool) {
func histogramSplit(b []byte, h []uint16) {
// Tested, and slightly faster than 2-way.
// Writing to separate arrays and combining is also slightly slower.
h = h[:256]
for _, t := range b {
h[t]++
for len(b)&3 != 0 {
h[b[0]]++
b = b[1:]
}
if fill {
fillHist(h)
n := len(b) / 4
x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
for i, t := range x {
v0 := &h[t]
v1 := &h[y[i]]
v3 := &h[w[i]]
v2 := &h[z[i]]
*v0++
*v1++
*v2++
*v3++
}
}

0 comments on commit 03dccc3

Please sign in to comment.