From b15768adc042aa9efde789aaa0866f609a40ca6a Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Mon, 28 Nov 2022 18:59:31 +0100 Subject: [PATCH] zstd: Write table clearing in a way that the compiler recognizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark results on amd64 below. These do not take into account #701. They're for Go 1.19; Go 1.20 produces slightly better asm for the old code, but still produces terrible asm on 32-bit platforms. See also golang/go#56954. name old speed new speed delta Encoder_EncodeAllXML-8 283MB/s ± 1% 284MB/s ± 0% ~ (p=0.026 n=30+20) Encoder_EncodeAllSimple/fastest-8 111MB/s ± 0% 111MB/s ± 1% ~ (p=0.011 n=28+20) Encoder_EncodeAllSimple/default-8 78.4MB/s ± 1% 78.3MB/s ± 1% ~ (p=0.572 n=30+19) Encoder_EncodeAllSimple/better-8 65.9MB/s ± 1% 66.2MB/s ± 1% +0.53% (p=0.009 n=30+20) Encoder_EncodeAllSimple/best-8 11.1MB/s ± 1% 11.6MB/s ± 3% +4.42% (p=0.000 n=27+28) Encoder_EncodeAllSimple4K/fastest-8 911MB/s ± 1% 914MB/s ± 1% +0.31% (p=0.004 n=29+20) Encoder_EncodeAllSimple4K/default-8 73.1MB/s ± 1% 73.6MB/s ± 1% +0.67% (p=0.000 n=29+20) Encoder_EncodeAllSimple4K/better-8 60.5MB/s ± 1% 62.7MB/s ± 1% +3.64% (p=0.000 n=29+17) Encoder_EncodeAllSimple4K/best-8 8.62MB/s ± 3% 10.11MB/s ± 1% +17.24% (p=0.000 n=30+27) Encoder_EncodeAllHTML-8 133MB/s ± 1% 133MB/s ± 1% ~ (p=0.101 n=30+19) Encoder_EncodeAllTwain-8 84.8MB/s ± 1% 86.2MB/s ± 3% +1.63% (p=0.000 n=24+20) Encoder_EncodeAllPi-8 62.6MB/s ± 1% 62.7MB/s ± 0% ~ (p=0.102 n=30+20) Random4KEncodeAllFastest-8 2.50GB/s ± 1% 2.50GB/s ± 1% ~ (p=0.449 n=29+20) Random10MBEncodeAllFastest-8 2.39GB/s ± 2% 2.52GB/s ± 6% +5.23% (p=0.000 n=27+20) name old alloc/op new alloc/op delta Encoder_EncodeAllXML-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple/fastest-8 2.73B ±27% 3.00B ± 0% ~ (p=0.018 n=30+18) Encoder_EncodeAllSimple/default-8 4.00B ± 0% 4.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/better-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/best-8 19.5B ± 3% 19.0B ± 0% -2.40% (p=0.000 n=30+24) Encoder_EncodeAllSimple4K/fastest-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/default-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/better-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/best-8 2.00B ± 0% 1.43B ±40% -28.33% (p=0.000 n=30+30) Encoder_EncodeAllHTML-8 2.37B ±27% 2.25B ±33% ~ (p=0.398 n=30+20) Encoder_EncodeAllTwain-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllPi-8 12.4B ± 5% 12.2B ± 6% ~ (p=0.283 n=30+20) Random4KEncodeAllFastest-8 0.00B 0.00B ~ (all equal) Random10MBEncodeAllFastest-8 31.9kB ± 2% 30.5kB ± 9% -4.27% (p=0.002 n=28+20) --- zstd/enc_best.go | 8 ++------ zstd/enc_better.go | 8 ++------ zstd/enc_dfast.go | 8 ++------ zstd/enc_fast.go | 4 +--- 4 files changed, 7 insertions(+), 21 deletions(-) diff --git a/zstd/enc_best.go b/zstd/enc_best.go index b1c3db8f7b..0d1a60388f 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -87,12 +87,8 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = prevEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [bestShortTableSize]prevEntry{} + e.longTable = [bestLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } diff --git a/zstd/enc_better.go b/zstd/enc_better.go index d70e3fd3d3..0a8518b3cb 100644 --- a/zstd/enc_better.go +++ b/zstd/enc_better.go @@ -64,12 +64,8 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [betterShortTableSize]tableEntry{} + e.longTable = [betterLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } diff --git a/zstd/enc_dfast.go b/zstd/enc_dfast.go index 1f4a9a2455..f25db686e4 100644 --- a/zstd/enc_dfast.go +++ b/zstd/enc_dfast.go @@ -46,12 +46,8 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = tableEntry{} - } + e.table = [dFastShortTableSize]tableEntry{} + e.longTable = [dFastLongTableSize]tableEntry{} e.cur = e.maxMatchOff break } diff --git a/zstd/enc_fast.go b/zstd/enc_fast.go index 202636db05..b8aaa00a88 100644 --- a/zstd/enc_fast.go +++ b/zstd/enc_fast.go @@ -557,9 +557,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } + e.table = [tableSize]tableEntry{} e.cur = e.maxMatchOff break }