From b95ee962027cc661b8c8d04344d75e1896610dcb Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 7 Sep 2022 10:27:41 +0200 Subject: [PATCH] flate: Improve level 5+6 compression (#656) Skip 2 bytes when doing end-of-match search. This allows better matches to be picked up further forward, adding up to 2 literals, but giving a better match. Level 5: nyc: 779171549 -> 767765165 github-june: 943488021 -> 938777887 github-ranks: 435111686 -> 435008334 sofia: 3083510553 -> 3048677443 level6, nyc: 768119006 -> 753545897 Performance unchanged. --- flate/level5.go | 15 +++++++++++---- flate/level6.go | 21 +++++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/flate/level5.go b/flate/level5.go index 4b97576bd3..ef6339d95f 100644 --- a/flate/level5.go +++ b/flate/level5.go @@ -191,14 +191,21 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end of best match... if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - // Test current - t2 := eLong - e.cur - l - off := s - t2 + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } } diff --git a/flate/level6.go b/flate/level6.go index 62888edf3c..85e4b2095f 100644 --- a/flate/level6.go +++ b/flate/level6.go @@ -213,24 +213,33 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end-of-match... if sAt := s + l; sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] // Test current - t2 := eLong.Cur.offset - e.cur - l - off := s - t2 + t2 := eLong.Cur.offset - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if off < maxMatchOffset { if off > 0 && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } // Test next: - t2 = eLong.Prev.offset - e.cur - l - off := s - t2 + t2 = eLong.Prev.offset - e.cur - l + skipBeginning + off := s2 - t2 if off > 0 && off < maxMatchOffset && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } }