Skip to content

Commit

Permalink
zstd: Rewrite matchLen to make it inlineable (#701)
Browse files Browse the repository at this point in the history
fastBase.matchlen is also inlineable.

name                                 old speed      new speed      delta
Encoder_EncodeAllXML-8                284MB/s ± 1%   283MB/s ± 1%  -0.28%  (p=0.004 n=19+20)
Encoder_EncodeAllSimple/fastest-8     111MB/s ± 0%   112MB/s ± 1%  +0.95%  (p=0.000 n=17+19)
Encoder_EncodeAllSimple/default-8    78.2MB/s ± 1%  77.8MB/s ± 0%  -0.47%  (p=0.000 n=20+19)
Encoder_EncodeAllSimple/better-8     65.6MB/s ± 1%  65.7MB/s ± 1%    ~     (p=0.189 n=20+20)
Encoder_EncodeAllSimple/best-8       11.1MB/s ± 2%  11.8MB/s ± 0%  +6.19%  (p=0.000 n=18+16)
Encoder_EncodeAllSimple4K/fastest-8   912MB/s ± 0%   912MB/s ± 1%    ~     (p=0.815 n=18+18)
Encoder_EncodeAllSimple4K/default-8  72.9MB/s ± 1%  74.1MB/s ± 1%  +1.68%  (p=0.000 n=20+17)
Encoder_EncodeAllSimple4K/better-8   60.5MB/s ± 1%  60.5MB/s ± 1%    ~     (p=0.767 n=20+18)
Encoder_EncodeAllSimple4K/best-8     8.53MB/s ± 2%  8.84MB/s ± 1%  +3.59%  (p=0.000 n=20+20)
Encoder_EncodeAllHTML-8               133MB/s ± 1%   132MB/s ± 1%  -0.62%  (p=0.000 n=20+20)
Encoder_EncodeAllTwain-8             84.8MB/s ± 1%  86.1MB/s ± 1%  +1.51%  (p=0.000 n=20+15)
Encoder_EncodeAllPi-8                62.6MB/s ± 1%  63.2MB/s ± 1%  +1.00%  (p=0.000 n=20+19)
Random4KEncodeAllFastest-8           2.50GB/s ± 1%  2.52GB/s ± 0%  +0.72%  (p=0.000 n=20+19)
Random10MBEncodeAllFastest-8         2.39GB/s ± 1%  2.48GB/s ± 5%    ~     (p=0.121 n=20+20)

name                                 old alloc/op   new alloc/op   delta
Encoder_EncodeAllXML-8                  0.00B          0.00B         ~     (all equal)
Encoder_EncodeAllSimple/fastest-8       2.75B ±27%     3.00B ± 0%    ~     (p=0.062 n=20+18)
Encoder_EncodeAllSimple/default-8       4.00B ± 0%     4.00B ± 0%    ~     (all equal)
Encoder_EncodeAllSimple/better-8        5.00B ± 0%     5.00B ± 0%    ~     (all equal)
Encoder_EncodeAllSimple/best-8          19.3B ± 4%     18.0B ± 0%  -6.74%  (p=0.000 n=20+16)
Encoder_EncodeAllSimple4K/fastest-8     0.00B          0.00B         ~     (all equal)
Encoder_EncodeAllSimple4K/default-8     0.00B          0.00B         ~     (all equal)
Encoder_EncodeAllSimple4K/better-8      0.00B          0.00B         ~     (all equal)
Encoder_EncodeAllSimple4K/best-8        2.00B ± 0%     2.00B ± 0%    ~     (all equal)
Encoder_EncodeAllHTML-8                 2.45B ±22%     2.50B ±20%    ~     (p=1.000 n=20+20)
Encoder_EncodeAllTwain-8                0.00B          0.00B         ~     (all equal)
Encoder_EncodeAllPi-8                   12.4B ± 5%     12.0B ± 0%  -3.23%  (p=0.002 n=20+18)
Random4KEncodeAllFastest-8              0.00B          0.00B         ~     (all equal)
Random10MBEncodeAllFastest-8           32.0kB ± 2%    30.9kB ± 6%    ~     (p=0.114 n=20+20)
  • Loading branch information
greatroar committed Nov 29, 2022
1 parent 6efddf2 commit b7c48cb
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 34 deletions.
19 changes: 1 addition & 18 deletions zstd/enc_base.go
Expand Up @@ -126,24 +126,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}

a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
return int32(matchLen(src[s:], src[t:]))
}

// Reset the encoding table.
Expand Down
27 changes: 11 additions & 16 deletions zstd/zstd.go
Expand Up @@ -110,26 +110,25 @@ func printf(format string, a ...interface{}) {
}
}

// matchLen returns the maximum length.
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.
func matchLen(a, b []byte) int {
b = b[:len(a)]
for i := 0; i < len(a)-7; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + (bits.TrailingZeros64(diff) >> 3)
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}

checked := (len(a) >> 3) << 3
a = a[checked:]
b = b[checked:]
for i := range a {
if a[i] != b[i] {
return i + checked
break
}
n++
}
return len(a) + checked
return n

}

func load3232(b []byte, i int32) uint32 {
Expand All @@ -140,10 +139,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

func load64(b []byte, i int) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

type byter interface {
Bytes() []byte
Len() int
Expand Down
21 changes: 21 additions & 0 deletions zstd/zstd_test.go
Expand Up @@ -31,3 +31,24 @@ func TestMain(m *testing.M) {
}
os.Exit(ec)
}

func TestMatchLen(t *testing.T) {
a := make([]byte, 130)
for i := range a {
a[i] = byte(i)
}
b := append([]byte{}, a...)

check := func(x, y []byte, l int) {
if m := matchLen(x, y); m != l {
t.Error("expected", l, "got", m)
}
}

for l := range a {
a[l] = ^a[l]
check(a, b, l)
check(a[:l], b, l)
a[l] = ^a[l]
}
}

0 comments on commit b7c48cb

Please sign in to comment.