Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

flate: Improve level 3 compression #478

Merged
merged 2 commits into from Jan 28, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 24 additions & 21 deletions flate/level3.go
Expand Up @@ -5,14 +5,16 @@ import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
table [tableSize]tableEntryPrev
table [1 << 16]tableEntryPrev
}

// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 8 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
tableBits = 16
tableSize = 1 << tableBits
)

if debugDeflate && e.cur < 0 {
Expand Down Expand Up @@ -73,7 +75,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
nextHash := hash4u(cv, tableBits)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
Expand Down Expand Up @@ -156,7 +158,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(t+4) < len(src) && t > 0 {
cv := load3232(src, t)
nextHash := hash(cv)
nextHash := hash4u(cv, tableBits)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
Expand All @@ -165,30 +167,31 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
goto emitRemainder
}

// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-3 to s.
x := load6432(src, s-3)
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3},
// Store every 5th hash in-between.
for i := s - l + 2; i < s-5; i += 5 {
nextHash := hash4u(load3232(src, i), tableBits)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}}
}
x >>= 8
prevHash = hash(uint32(x))
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s.
x := load6432(src, s-2)
prevHash := hash4u(uint32(x), tableBits)

e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
prevHash = hash(uint32(x))
prevHash = hash4u(uint32(x), tableBits)

e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
currHash := hash(uint32(x))
currHash := hash4u(uint32(x), tableBits)
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Expand All @@ -200,15 +203,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)

if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
if candidate.offset > minOffset {
if cv == load3232(src, candidate.offset-e.cur) {
// Found a match...
continue
}
candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
// Match at prev...
continue
}
}
cv = uint32(x >> 8)
Expand Down