From a1300314ac52e959f3c28155dc5d2d117ccb6607 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Sat, 26 Nov 2022 13:51:00 +0100 Subject: [PATCH] zstd: Rewrite matchLen to make it inlineable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit match.matchlen is also inlineable. name old speed new speed delta Encoder_EncodeAllXML-8 284MB/s ± 1% 283MB/s ± 1% -0.28% (p=0.004 n=19+20) Encoder_EncodeAllSimple/fastest-8 111MB/s ± 0% 112MB/s ± 1% +0.95% (p=0.000 n=17+19) Encoder_EncodeAllSimple/default-8 78.2MB/s ± 1% 77.8MB/s ± 0% -0.47% (p=0.000 n=20+19) Encoder_EncodeAllSimple/better-8 65.6MB/s ± 1% 65.7MB/s ± 1% ~ (p=0.189 n=20+20) Encoder_EncodeAllSimple/best-8 11.1MB/s ± 2% 11.8MB/s ± 0% +6.19% (p=0.000 n=18+16) Encoder_EncodeAllSimple4K/fastest-8 912MB/s ± 0% 912MB/s ± 1% ~ (p=0.815 n=18+18) Encoder_EncodeAllSimple4K/default-8 72.9MB/s ± 1% 74.1MB/s ± 1% +1.68% (p=0.000 n=20+17) Encoder_EncodeAllSimple4K/better-8 60.5MB/s ± 1% 60.5MB/s ± 1% ~ (p=0.767 n=20+18) Encoder_EncodeAllSimple4K/best-8 8.53MB/s ± 2% 8.84MB/s ± 1% +3.59% (p=0.000 n=20+20) Encoder_EncodeAllHTML-8 133MB/s ± 1% 132MB/s ± 1% -0.62% (p=0.000 n=20+20) Encoder_EncodeAllTwain-8 84.8MB/s ± 1% 86.1MB/s ± 1% +1.51% (p=0.000 n=20+15) Encoder_EncodeAllPi-8 62.6MB/s ± 1% 63.2MB/s ± 1% +1.00% (p=0.000 n=20+19) Random4KEncodeAllFastest-8 2.50GB/s ± 1% 2.52GB/s ± 0% +0.72% (p=0.000 n=20+19) Random10MBEncodeAllFastest-8 2.39GB/s ± 1% 2.48GB/s ± 5% ~ (p=0.121 n=20+20) name old alloc/op new alloc/op delta Encoder_EncodeAllXML-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple/fastest-8 2.75B ±27% 3.00B ± 0% ~ (p=0.062 n=20+18) Encoder_EncodeAllSimple/default-8 4.00B ± 0% 4.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/better-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/best-8 19.3B ± 4% 18.0B ± 0% -6.74% (p=0.000 n=20+16) Encoder_EncodeAllSimple4K/fastest-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/default-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/better-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/best-8 2.00B ± 0% 2.00B ± 0% ~ (all equal) Encoder_EncodeAllHTML-8 2.45B ±22% 2.50B ±20% ~ (p=1.000 n=20+20) Encoder_EncodeAllTwain-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllPi-8 12.4B ± 5% 12.0B ± 0% -3.23% (p=0.002 n=20+18) Random4KEncodeAllFastest-8 0.00B 0.00B ~ (all equal) Random10MBEncodeAllFastest-8 32.0kB ± 2% 30.9kB ± 6% ~ (p=0.114 n=20+20) --- zstd/enc_base.go | 19 +------------------ zstd/zstd.go | 27 +++++++++++---------------- zstd/zstd_test.go | 21 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/zstd/enc_base.go b/zstd/enc_base.go index 15ae8ee807..2760308d0b 100644 --- a/zstd/enc_base.go +++ b/zstd/enc_base.go @@ -126,24 +126,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) } } - a := src[s:] - b := src[t:] - b = b[:len(a)] - end := int32((len(a) >> 3) << 3) - for i := int32(0); i < end; i += 8 { - if diff := load6432(a, i) ^ load6432(b, i); diff != 0 { - return i + int32(bits.TrailingZeros64(diff)>>3) - } - } - - a = a[end:] - b = b[end:] - for i := range a { - if a[i] != b[i] { - return int32(i) + end - } - } - return int32(len(a)) + end + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. diff --git a/zstd/zstd.go b/zstd/zstd.go index 3eb3f1c826..34b3cfdb08 100644 --- a/zstd/zstd.go +++ b/zstd/zstd.go @@ -110,26 +110,25 @@ func printf(format string, a ...interface{}) { } } -// matchLen returns the maximum length. +// matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. -// The function also returns whether all bytes matched. -func matchLen(a, b []byte) int { - b = b[:len(a)] - for i := 0; i < len(a)-7; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - return i + (bits.TrailingZeros64(diff) >> 3) +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 } + n += 8 } - checked := (len(a) >> 3) << 3 - a = a[checked:] - b = b[checked:] for i := range a { if a[i] != b[i] { - return i + checked + break } + n++ } - return len(a) + checked + return n + } func load3232(b []byte, i int32) uint32 { @@ -140,10 +139,6 @@ func load6432(b []byte, i int32) uint64 { return binary.LittleEndian.Uint64(b[i:]) } -func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - type byter interface { Bytes() []byte Len() int diff --git a/zstd/zstd_test.go b/zstd/zstd_test.go index 0278d49c45..fd1d3168e5 100644 --- a/zstd/zstd_test.go +++ b/zstd/zstd_test.go @@ -31,3 +31,24 @@ func TestMain(m *testing.M) { } os.Exit(ec) } + +func TestMatchLen(t *testing.T) { + a := make([]byte, 130) + for i := range a { + a[i] = byte(i) + } + b := append([]byte{}, a...) + + check := func(x, y []byte, l int) { + if m := matchLen(x, y); m != l { + t.Error("expected", l, "got", m) + } + } + + for l := range a { + a[l] = ^a[l] + check(a, b, l) + check(a[:l], b, l) + a[l] = ^a[l] + } +}