Skip to content

Commit

Permalink
zstd: Write table clearing in a way that the compiler recognizes
Browse files Browse the repository at this point in the history
Benchmark results on amd64 below. These do not take into account klauspost#701.
They're on Go 1.19; Go 1.20 produces slightly better asm for the old
code, but still produces terrible asm on 32-bit platforms.

See also golang/go#56954.

name                                 old speed      new speed       delta
Encoder_EncodeAllXML-8                283MB/s ± 1%    284MB/s ± 0%     ~     (p=0.026 n=30+20)
Encoder_EncodeAllSimple/fastest-8     111MB/s ± 0%    111MB/s ± 1%     ~     (p=0.011 n=28+20)
Encoder_EncodeAllSimple/default-8    78.4MB/s ± 1%   78.3MB/s ± 1%     ~     (p=0.572 n=30+19)
Encoder_EncodeAllSimple/better-8     65.9MB/s ± 1%   66.2MB/s ± 1%   +0.53%  (p=0.009 n=30+20)
Encoder_EncodeAllSimple/best-8       11.1MB/s ± 1%   11.6MB/s ± 3%   +4.42%  (p=0.000 n=27+28)
Encoder_EncodeAllSimple4K/fastest-8   911MB/s ± 1%    914MB/s ± 1%   +0.31%  (p=0.004 n=29+20)
Encoder_EncodeAllSimple4K/default-8  73.1MB/s ± 1%   73.6MB/s ± 1%   +0.67%  (p=0.000 n=29+20)
Encoder_EncodeAllSimple4K/better-8   60.5MB/s ± 1%   62.7MB/s ± 1%   +3.64%  (p=0.000 n=29+17)
Encoder_EncodeAllSimple4K/best-8     8.62MB/s ± 3%  10.11MB/s ± 1%  +17.24%  (p=0.000 n=30+27)
Encoder_EncodeAllHTML-8               133MB/s ± 1%    133MB/s ± 1%     ~     (p=0.101 n=30+19)
Encoder_EncodeAllTwain-8             84.8MB/s ± 1%   86.2MB/s ± 3%   +1.63%  (p=0.000 n=24+20)
Encoder_EncodeAllPi-8                62.6MB/s ± 1%   62.7MB/s ± 0%     ~     (p=0.102 n=30+20)
Random4KEncodeAllFastest-8           2.50GB/s ± 1%   2.50GB/s ± 1%     ~     (p=0.449 n=29+20)
Random10MBEncodeAllFastest-8         2.39GB/s ± 2%   2.52GB/s ± 6%   +5.23%  (p=0.000 n=27+20)

name                                 old alloc/op   new alloc/op    delta
Encoder_EncodeAllXML-8                  0.00B           0.00B          ~     (all equal)
Encoder_EncodeAllSimple/fastest-8       2.73B ±27%      3.00B ± 0%     ~     (p=0.018 n=30+18)
Encoder_EncodeAllSimple/default-8       4.00B ± 0%      4.00B ± 0%     ~     (all equal)
Encoder_EncodeAllSimple/better-8        5.00B ± 0%      5.00B ± 0%     ~     (all equal)
Encoder_EncodeAllSimple/best-8          19.5B ± 3%      19.0B ± 0%   -2.40%  (p=0.000 n=30+24)
Encoder_EncodeAllSimple4K/fastest-8     0.00B           0.00B          ~     (all equal)
Encoder_EncodeAllSimple4K/default-8     0.00B           0.00B          ~     (all equal)
Encoder_EncodeAllSimple4K/better-8      0.00B           0.00B          ~     (all equal)
Encoder_EncodeAllSimple4K/best-8        2.00B ± 0%      1.43B ±40%  -28.33%  (p=0.000 n=30+30)
Encoder_EncodeAllHTML-8                 2.37B ±27%      2.25B ±33%     ~     (p=0.398 n=30+20)
Encoder_EncodeAllTwain-8                0.00B           0.00B          ~     (all equal)
Encoder_EncodeAllPi-8                   12.4B ± 5%      12.2B ± 6%     ~     (p=0.283 n=30+20)
Random4KEncodeAllFastest-8              0.00B           0.00B          ~     (all equal)
Random10MBEncodeAllFastest-8           31.9kB ± 2%     30.5kB ± 9%   -4.27%  (p=0.002 n=28+20)
  • Loading branch information
greatroar committed Nov 29, 2022
1 parent 6efddf2 commit 594c1ea
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 21 deletions.
8 changes: 2 additions & 6 deletions zstd/enc_best.go
Expand Up @@ -87,12 +87,8 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = prevEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.table = [bestShortTableSize]prevEntry{}
e.longTable = [bestLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
Expand Down
8 changes: 2 additions & 6 deletions zstd/enc_better.go
Expand Up @@ -64,12 +64,8 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.table = [betterShortTableSize]tableEntry{}
e.longTable = [betterLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
Expand Down
8 changes: 2 additions & 6 deletions zstd/enc_dfast.go
Expand Up @@ -46,12 +46,8 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = tableEntry{}
}
e.table = [dFastShortTableSize]tableEntry{}
e.longTable = [dFastLongTableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
Expand Down
4 changes: 1 addition & 3 deletions zstd/enc_fast.go
Expand Up @@ -557,9 +557,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.table = [tableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
Expand Down

0 comments on commit 594c1ea

Please sign in to comment.