Skip to content

Commit

Permalink
flate: Improve level 1-3 compression (#678)
Browse files Browse the repository at this point in the history
Use 5 byte hash instead of 4 byte hash.

This improves compression in most cases and will also yield faster decompression. Little to no performance impact.

Before/after:
```
file	out	level	insize	outsize	millis
nyc-taxi-data-10M.csv	gzkp	1	3325605752	922273214	14065	225.49
nyc-taxi-data-10M.csv	gzkp	1	3325605752	846471964	14342	221.12

nyc-taxi-data-10M.csv	gzkp	2	3325605752	883782053	15683	202.22
nyc-taxi-data-10M.csv	gzkp	2	3325605752	815766227	14865	213.35

nyc-taxi-data-10M.csv	gzkp	3	3325605752	878726683	17308	183.24
nyc-taxi-data-10M.csv	gzkp	3	3325605752	808448239	16882	187.86

nyc-taxi-data-10M.csv	gzkp	4	3325605752	789447233	20651	153.57
nyc-taxi-data-10M.csv	gzkp	4	3325605752	789447233	20657	153.53

file	out	level	insize	outsize	millis	mb/s
enwik9	gzkp	1	1000000000	382781160	5713	166.90
enwik9	gzkp	1	1000000000	374131553	5826	163.69

enwik9	gzkp	2	1000000000	371351753	6131	155.55
enwik9	gzkp	2	1000000000	361881529	5910	161.36

enwik9	gzkp	3	1000000000	364881746	6891	138.39
enwik9	gzkp	3	1000000000	355065173	6960	137.02

enwik9	gzkp	4	1000000000	342732211	8339	114.36
enwik9	gzkp	4	1000000000	342732211	8252	115.57

file	reset	out	level	files	insize	outsize	millis	mb/s
objectfiles	true	gzkp	1	708	300491980	56114777	1008	284.27
objectfiles	true	gzkp	1	708	300491980	55300071	998	286.90

objectfiles	true	gzkp	2	708	300491980	53946448	1147	249.71
objectfiles	true	gzkp	2	708	300491980	52750260	1109	258.36

objectfiles	true	gzkp	3	708	300491980	53110452	1220	234.82
objectfiles	true	gzkp	3	708	300491980	51947585	1211	236.46


One of the few regressions:

file	out	level	insize	outsize	millis	mb/s
rawstudio-mint14.tar	gzkp	1	8558382592	3960117298	36682	222.50
rawstudio-mint14.tar	gzkp	1	8558382592	3985295228	36619	222.88

rawstudio-mint14.tar	gzkp	2	8558382592	3899597850	38683	210.99
rawstudio-mint14.tar	gzkp	2	8558382592	3921716642	36754	222.06

rawstudio-mint14.tar	gzkp	3	8558382592	3848762302	46588	175.19
rawstudio-mint14.tar	gzkp	3	8558382592	3846475496	45611	178.94
```
  • Loading branch information
klauspost committed Sep 25, 2022
1 parent 3822c7c commit b8a3c61
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 106 deletions.
6 changes: 6 additions & 0 deletions flate/deflate.go
Expand Up @@ -374,6 +374,12 @@ func hash4(b []byte) uint32 {
return hash4u(binary.LittleEndian.Uint32(b), hashBits)
}

// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}

// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
Expand Down
8 changes: 4 additions & 4 deletions flate/deflate_test.go
Expand Up @@ -316,13 +316,13 @@ func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name str
}
w.Write(input)
w.Close()
if limit > 0 && buffer.Len() > limit {
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
return
}
if limit > 0 {
t.Logf("level: %d - Size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len())
}
if limit > 0 && buffer.Len() > limit {
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
}

r := NewReader(&buffer)
out, err := io.ReadAll(r)
if err != nil {
Expand Down
56 changes: 19 additions & 37 deletions flate/fast_encoder.go
Expand Up @@ -58,17 +58,6 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)

func load32(b []byte, i int) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}

func load64(b []byte, i int) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
Expand All @@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}

type tableEntry struct {
offset int32
}
Expand Down Expand Up @@ -115,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 {
return s
}

// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}

type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}

// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
}

// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}

// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64))
}

// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64))
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return uint32((u * prime8bytes) >> (64 - length))
default:
return (uint32(u) * prime4bytes) >> (32 - length)
}
}

// matchlen will return the match length between offsets and t in src.
Expand Down
27 changes: 14 additions & 13 deletions flate/level1.go
Expand Up @@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
Expand Down Expand Up @@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)

// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
cv := load6432(src, s)

for {
const skipLog = 5
Expand All @@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
Expand All @@ -86,28 +87,28 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {

now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now))
nextHash = hashLen(now, tableBits, hashBytes)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

// Do one right away...
cv = uint32(now)
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = uint32(now)
cv = now
s = nextS
}

Expand Down Expand Up @@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur}
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
Expand All @@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hash(uint32(x))
currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8)
cv = x >> 8
s++
break
}
Expand Down
35 changes: 18 additions & 17 deletions flate/level2.go
Expand Up @@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)

if debugDeflate && e.cur < 0 {
Expand Down Expand Up @@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)

// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
Expand All @@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
nextHash := hash4u(cv, bTableBits)
nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
Expand All @@ -84,27 +85,27 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits)
nextHash = hashLen(now, bTableBits, hashBytes)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

// Do one right away...
cv = uint32(now)
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
cv = uint32(now)
cv = now
}

// A 4-byte match has been found. We'll later see if more than 4 bytes
Expand Down Expand Up @@ -154,25 +155,25 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {

if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}

// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i)
nextHash := hash4u(uint32(x), bTableBits)
nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}

Expand All @@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
prevHash := hashLen(x, bTableBits, hashBytes)
prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits)
currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24)
cv = x >> 24
s++
break
}
Expand Down

0 comments on commit b8a3c61

Please sign in to comment.