From 55f4905a6e89b407e18c4f08a72c3493a6eb29b4 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Wed, 23 Nov 2022 20:13:55 +0100 Subject: [PATCH] zstd: Inline blockDec.checkCRC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit checkCRC is now a uint32 with a boolean indicating whether it needs checking. This makes the comparison shorter and saves some allocations. Benchmark results on older i7: name old speed new speed delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 458MB/s ± 0% 458MB/s ± 0% ~ (p=0.139 n=9+8) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 568MB/s ± 2% 566MB/s ± 4% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 1.19GB/s ± 0% 1.20GB/s ± 1% ~ (p=0.051 n=9+10) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 832MB/s ± 2% 839MB/s ± 2% ~ (p=0.156 n=10+9) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 352MB/s ± 0% 352MB/s ± 0% ~ (p=0.148 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 534MB/s ± 7% 542MB/s ± 3% ~ (p=0.315 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 422MB/s ± 0% 422MB/s ± 0% ~ (p=0.408 n=8+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 606MB/s ± 9% 618MB/s ± 7% ~ (p=0.393 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 362MB/s ± 5% 367MB/s ± 0% +1.44% (p=0.026 n=9+10) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 449MB/s ± 3% 463MB/s ± 4% +3.28% (p=0.003 n=10+9) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 361MB/s ± 0% 361MB/s ± 0% ~ (p=0.888 n=8+9) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 569MB/s ± 8% 577MB/s ± 2% ~ (p=0.541 n=9+8) Decoder_DecoderSmall/html_x_4.zst/buffered-8 2.46GB/s ± 1% 2.46GB/s ± 0% ~ (p=0.720 n=10+9) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 1.67GB/s ± 6% 1.65GB/s ± 4% ~ (p=0.315 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 3.87GB/s ± 0% 3.87GB/s ± 0% ~ (p=0.497 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 1.84GB/s ± 7% 1.83GB/s ± 2% ~ (p=0.912 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 8.63GB/s ± 1% 8.61GB/s ± 0% ~ (p=0.063 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 3.34GB/s ± 3% 3.32GB/s ± 5% ~ (p=0.579 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 586MB/s ± 1% 584MB/s ± 0% -0.46% (p=0.005 n=9+7) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 877MB/s ± 2% 879MB/s ± 2% ~ (p=0.796 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 961MB/s ± 0% 963MB/s ± 0% +0.19% (p=0.004 n=9+9) Decoder_DecoderSmall/html.zst/unbuffered-8 709MB/s ± 1% 715MB/s ± 3% ~ (p=0.203 n=8+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 407MB/s ± 0% 405MB/s ± 0% -0.47% (p=0.000 n=10+8) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 152MB/s ± 3% 151MB/s ± 4% ~ (p=0.853 n=10+10) name old alloc/op new alloc/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 54.0B ± 0% 54.0B ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 4.57kB ± 3% 4.57kB ± 3% ~ (p=0.985 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 4.47kB ± 0% 4.47kB ± 1% ~ (p=0.481 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 68.6B ± 2% 68.0B ± 0% ~ (p=0.173 n=10+9) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 4.64kB ± 5% 4.62kB ± 4% ~ (p=1.000 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 63.0B ± 0% 63.0B ± 0% ~ (all equal) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 4.65kB ± 3% 4.56kB ± 3% -1.94% (p=0.014 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 53.0B ± 0% 53.0B ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 4.49kB ± 1% 4.45kB ± 2% -0.93% (p=0.006 n=9+9) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 77.9B ± 1% 77.5B ± 1% ~ (p=0.176 n=9+8) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 4.52kB ± 2% 4.48kB ± 1% -0.94% (p=0.003 n=10+9) Decoder_DecoderSmall/html_x_4.zst/buffered-8 53.0B ± 6% 56.0B ± 0% ~ (p=0.059 n=10+8) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 4.46kB ± 0% 4.48kB ± 2% ~ (p=1.000 n=8+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 4.46kB ± 0% 4.43kB ± 0% -0.72% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 4.07kB ± 0% 4.04kB ± 0% -0.79% (p=0.000 n=9+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 66.0B ± 0% 66.0B ± 0% ~ (all equal) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 4.60kB ± 3% 4.53kB ± 5% ~ (p=0.052 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-8 4.47kB ± 0% 4.46kB ± 1% ~ (p=0.165 n=8+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 4.44kB ± 0% 4.44kB ± 0% ~ (p=0.171 n=10+10) name old allocs/op new allocs/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/html_x_4.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/html_x_4.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8 32.0 ± 0% 24.0 ± 0% -25.00% (p=0.000 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/urls.10K.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/html.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-8 40.0 ± 0% 32.0 ± 0% -20.00% (p=0.000 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8 32.0 ± 0% 32.0 ± 0% ~ (all equal) And on a newer i5: name old speed new speed delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16 730MB/s ± 2% 729MB/s ± 2% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16 1.16GB/s ± 5% 1.17GB/s ± 5% ~ (p=0.720 n=10+9) Decoder_DecoderSmall/geo.protodata.zst/buffered-16 2.45GB/s ± 2% 2.45GB/s ± 4% ~ (p=0.853 n=10+10) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16 2.47GB/s ± 1% 2.47GB/s ± 1% ~ (p=0.631 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16 520MB/s ± 1% 528MB/s ± 2% +1.58% (p=0.015 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16 1.15GB/s ±12% 1.18GB/s ± 5% ~ (p=0.393 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-16 615MB/s ± 2% 627MB/s ± 2% +1.80% (p=0.001 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16 1.19GB/s ± 4% 1.20GB/s ± 5% ~ (p=0.684 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16 565MB/s ± 2% 572MB/s ± 2% ~ (p=0.075 n=10+10) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16 1.10GB/s ±10% 1.11GB/s ± 9% ~ (p=0.481 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-16 560MB/s ± 1% 567MB/s ± 2% +1.22% (p=0.008 n=10+9) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16 928MB/s ± 3% 919MB/s ± 3% ~ (p=0.218 n=10+10) Decoder_DecoderSmall/html_x_4.zst/buffered-16 4.12GB/s ± 2% 4.19GB/s ± 2% +1.75% (p=0.008 n=9+10) Decoder_DecoderSmall/html_x_4.zst/unbuffered-16 4.79GB/s ± 2% 4.77GB/s ± 2% ~ (p=0.542 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16 6.02GB/s ± 2% 5.96GB/s ± 2% ~ (p=0.123 n=10+10) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16 5.78GB/s ± 1% 5.81GB/s ± 1% ~ (p=0.123 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16 11.5GB/s ± 2% 11.3GB/s ± 2% -1.77% (p=0.005 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16 9.83GB/s ± 1% 9.80GB/s ± 1% ~ (p=0.353 n=10+10) Decoder_DecoderSmall/urls.10K.zst/buffered-16 845MB/s ± 1% 845MB/s ± 2% ~ (p=0.796 n=10+10) Decoder_DecoderSmall/urls.10K.zst/unbuffered-16 1.46GB/s ± 2% 1.46GB/s ± 3% ~ (p=0.971 n=10+10) Decoder_DecoderSmall/html.zst/buffered-16 1.80GB/s ± 2% 1.84GB/s ± 2% +2.11% (p=0.006 n=9+8) Decoder_DecoderSmall/html.zst/unbuffered-16 2.16GB/s ± 1% 2.18GB/s ± 1% +0.97% (p=0.019 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/buffered-16 702MB/s ± 3% 708MB/s ± 1% ~ (p=0.165 n=10+10) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16 609MB/s ± 1% 608MB/s ± 1% ~ (p=0.150 n=9+10) name old alloc/op new alloc/op delta Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16 55.0B ± 0% 55.0B ± 0% ~ (all equal) Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16 4.51kB ± 1% 4.47kB ± 1% -0.93% (p=0.001 n=10+9) Decoder_DecoderSmall/geo.protodata.zst/buffered-16 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16 4.48kB ± 1% 4.44kB ± 0% -0.87% (p=0.000 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16 74.5B ± 1% 74.2B ± 2% ~ (p=0.450 n=10+10) Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16 4.56kB ± 2% 4.53kB ± 2% ~ (p=0.306 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/buffered-16 67.5B ± 1% 67.0B ± 0% -0.74% (p=0.033 n=10+10) Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16 4.55kB ± 1% 4.50kB ± 1% -1.14% (p=0.000 n=9+10) Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16 54.0B ± 0% 54.0B ± 0% ~ (all equal) Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16 4.47kB ± 0% 4.44kB ± 0% -0.66% (p=0.000 n=9+10) Decoder_DecoderSmall/alice29.txt.zst/buffered-16 74.6B ± 6% 75.3B ± 7% ~ (p=0.883 n=10+10) Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16 4.48kB ± 0% 4.45kB ± 1% -0.57% (p=0.005 n=7+9) Decoder_DecoderSmall/html_x_4.zst/buffered-16 54.0B ± 0% 52.4B ± 5% -2.96% (p=0.046 n=8+10) Decoder_DecoderSmall/html_x_4.zst/unbuffered-16 4.48kB ± 0% 4.46kB ± 0% -0.43% (p=0.016 n=10+9) Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16 4.46kB ± 0% 4.43kB ± 0% -0.69% (p=0.000 n=10+10) Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16 4.07kB ± 0% 4.04kB ± 0% -0.80% (p=0.000 n=7+9) Decoder_DecoderSmall/urls.10K.zst/buffered-16 71.4B ± 1% 71.6B ± 1% ~ (p=0.656 n=10+10) Decoder_DecoderSmall/urls.10K.zst/unbuffered-16 4.55kB ± 2% 4.53kB ± 2% ~ (p=0.529 n=10+10) Decoder_DecoderSmall/html.zst/buffered-16 49.0B ± 0% 49.0B ± 0% ~ (all equal) Decoder_DecoderSmall/html.zst/unbuffered-16 4.48kB ± 0% 4.44kB ± 0% -0.91% (p=0.000 n=10+9) Decoder_DecoderSmall/comp-data.bin.zst/buffered-16 48.0B ± 0% 48.0B ± 0% ~ (all equal) Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16 4.44kB ± 0% 4.44kB ± 0% ~ (p=0.897 n=10+10) Number of allocs is the same here. --- zstd/blockdec.go | 5 +++-- zstd/decoder.go | 29 +++++++++++++---------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/zstd/blockdec.go b/zstd/blockdec.go index da814715da..6b9929ddf5 100644 --- a/zstd/blockdec.go +++ b/zstd/blockdec.go @@ -82,8 +82,9 @@ type blockDec struct { err error - // Check against this crc - checkCRC []byte + // Check against this crc, if hasCRC is true. + checkCRC uint32 + hasCRC bool // Frame to use for singlethreaded decoding. // Should not be used by the decoder itself since parent may be another frame. diff --git a/zstd/decoder.go b/zstd/decoder.go index 832d7b7ae1..e976de9a0e 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -5,7 +5,6 @@ package zstd import ( - "bytes" "context" "encoding/binary" "io" @@ -471,18 +470,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { } } } - if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 { - got := d.current.crc.Sum64() - var tmp [4]byte - binary.LittleEndian.PutUint32(tmp[:], uint32(got)) - if !bytes.Equal(tmp[:], next.d.checkCRC) { + if next.err == nil && next.d != nil && next.d.hasCRC { + got := uint32(d.current.crc.Sum64()) + if got != next.d.checkCRC { if debugDecoder { - println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)") + printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC) } d.current.err = ErrCRCMismatch } else { if debugDecoder { - println("CRC ok", tmp[:]) + printf("CRC ok %08x\n", got) } } } @@ -922,18 +919,18 @@ decodeStream: println("next block returned error:", err) } dec.err = err - dec.checkCRC = nil + dec.hasCRC = false if dec.Last && frame.HasCheckSum && err == nil { crc, err := frame.rawInput.readSmall(4) - if err != nil { + if len(crc) < 4 { println("CRC missing?", err) dec.err = err - } - var tmp [4]byte - copy(tmp[:], crc) - dec.checkCRC = tmp[:] - if debugDecoder { - println("found crc to check:", dec.checkCRC) + } else { + dec.checkCRC = binary.LittleEndian.Uint32(crc) + dec.hasCRC = true + if debugDecoder { + printf("found crc to check: %08x\n", dec.checkCRC) + } } } err = dec.err