Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

flate: Improve level 1-3 compression #678

Merged
merged 1 commit into from Sep 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions flate/deflate.go
Expand Up @@ -374,6 +374,12 @@ func hash4(b []byte) uint32 {
return hash4u(binary.LittleEndian.Uint32(b), hashBits)
}

// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}

// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
Expand Down
8 changes: 4 additions & 4 deletions flate/deflate_test.go
Expand Up @@ -316,13 +316,13 @@ func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name str
}
w.Write(input)
w.Close()
if limit > 0 && buffer.Len() > limit {
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
return
}
if limit > 0 {
t.Logf("level: %d - Size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len())
}
if limit > 0 && buffer.Len() > limit {
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
}

r := NewReader(&buffer)
out, err := io.ReadAll(r)
if err != nil {
Expand Down
56 changes: 19 additions & 37 deletions flate/fast_encoder.go
Expand Up @@ -58,17 +58,6 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)

func load32(b []byte, i int) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}

func load64(b []byte, i int) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
Expand All @@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}

type tableEntry struct {
offset int32
}
Expand Down Expand Up @@ -115,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 {
return s
}

// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}

type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}

// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
}

// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}

// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64))
}

// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64))
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return uint32((u * prime8bytes) >> (64 - length))
default:
return (uint32(u) * prime4bytes) >> (32 - length)
}
}

// matchlen will return the match length between offsets and t in src.
Expand Down
27 changes: 14 additions & 13 deletions flate/level1.go
Expand Up @@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
Expand Down Expand Up @@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)

// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
cv := load6432(src, s)

for {
const skipLog = 5
Expand All @@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
Expand All @@ -86,28 +87,28 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {

now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now))
nextHash = hashLen(now, tableBits, hashBytes)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

// Do one right away...
cv = uint32(now)
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = uint32(now)
cv = now
s = nextS
}

Expand Down Expand Up @@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur}
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
Expand All @@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hash(uint32(x))
currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8)
cv = x >> 8
s++
break
}
Expand Down
35 changes: 18 additions & 17 deletions flate/level2.go
Expand Up @@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)

if debugDeflate && e.cur < 0 {
Expand Down Expand Up @@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)

// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
Expand All @@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
nextHash := hash4u(cv, bTableBits)
nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
Expand All @@ -84,27 +85,27 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits)
nextHash = hashLen(now, bTableBits, hashBytes)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

// Do one right away...
cv = uint32(now)
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
cv = uint32(now)
cv = now
}

// A 4-byte match has been found. We'll later see if more than 4 bytes
Expand Down Expand Up @@ -154,25 +155,25 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {

if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}

// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i)
nextHash := hash4u(uint32(x), bTableBits)
nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}

Expand All @@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
prevHash := hashLen(x, bTableBits, hashBytes)
prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits)
currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24)
cv = x >> 24
s++
break
}
Expand Down