From f086b027f144af907690a0db9f0335c750301fa2 Mon Sep 17 00:00:00 2001 From: Ryan M Date: Sun, 25 Sep 2022 14:45:53 +0200 Subject: [PATCH 1/2] [zstd/enc] Cache align struct for big perf boost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` benchstat -delta-test none old2.txt new2.txt name old time/op new time/op delta Encoder_EncodeAllXML-16 13.3ms ± 0% 13.0ms ± 0% -2.11% Encoder_EncodeAllSimple/fastest-16 229µs ± 0% 227µs ± 0% -1.08% Encoder_EncodeAllSimple/default-16 343µs ± 0% 371µs ± 0% +8.13% Encoder_EncodeAllSimple/better-16 402µs ± 0% 393µs ± 0% -2.33% Encoder_EncodeAllSimple/best-16 6.41ms ± 0% 2.72ms ± 0% -57.48% <==== Encoder_EncodeAllSimple4K/fastest-16 2.70µs ± 0% 2.56µs ± 0% -5.26% Encoder_EncodeAllSimple4K/default-16 33.1µs ± 0% 33.5µs ± 0% +1.30% Encoder_EncodeAllSimple4K/better-16 39.3µs ± 0% 38.8µs ± 0% -1.12% Encoder_EncodeAllSimple4K/best-16 732µs ± 0% 360µs ± 0% -50.90% <==== Encoder_EncodeAllHTML-16 213µs ± 0% 209µs ± 0% -2.07% Encoder_EncodeAllTwain-16 3.23ms ± 0% 3.23ms ± 0% -0.04% Encoder_EncodeAllPi-16 1.12ms ± 0% 1.11ms ± 0% -1.01% Random4KEncodeAllFastest-16 988ns ± 0% 976ns ± 0% -1.31% Random10MBEncodeAllFastest-16 2.50ms ± 0% 2.48ms ± 0% -0.70% Random4KEncodeAllDefault-16 4.58µs ± 0% 4.56µs ± 0% -0.31% RandomEncodeAllDefault-16 2.58ms ± 0% 2.52ms ± 0% -2.20% Random10MBEncoderFastest-16 3.61ms ± 0% 3.61ms ± 0% -0.04% RandomEncoderDefault-16 3.44ms ± 0% 3.44ms ± 0% +0.03% name old speed new speed delta Encoder_EncodeAllXML-16 402MB/s ± 0% 410MB/s ± 0% +2.16% Encoder_EncodeAllSimple/fastest-16 173MB/s ± 0% 175MB/s ± 0% +1.10% Encoder_EncodeAllSimple/default-16 116MB/s ± 0% 107MB/s ± 0% -7.52% Encoder_EncodeAllSimple/better-16 99.0MB/s ± 0% 101.4MB/s ± 0% +2.38% Encoder_EncodeAllSimple/best-16 6.21MB/s ± 0% 14.61MB/s ± 0% +135.27% <==== Encoder_EncodeAllSimple4K/fastest-16 1.52GB/s ± 0% 1.60GB/s ± 0% +5.56% Encoder_EncodeAllSimple4K/default-16 124MB/s ± 0% 122MB/s ± 0% -1.29% Encoder_EncodeAllSimple4K/better-16 104MB/s ± 0% 106MB/s ± 0% +1.13% Encoder_EncodeAllSimple4K/best-16 5.59MB/s ± 0% 11.39MB/s ± 0% +103.76% <==== Encoder_EncodeAllHTML-16 208MB/s ± 0% 213MB/s ± 0% +2.11% Encoder_EncodeAllTwain-16 120MB/s ± 0% 120MB/s ± 0% +0.04% Encoder_EncodeAllPi-16 89.0MB/s ± 0% 89.9MB/s ± 0% +1.02% Random4KEncodeAllFastest-16 4.14GB/s ± 0% 4.20GB/s ± 0% +1.32% Random10MBEncodeAllFastest-16 4.19GB/s ± 0% 4.22GB/s ± 0% +0.71% Random4KEncodeAllDefault-16 895MB/s ± 0% 897MB/s ± 0% +0.31% RandomEncodeAllDefault-16 4.06GB/s ± 0% 4.15GB/s ± 0% +2.25% Random10MBEncoderFastest-16 2.90GB/s ± 0% 2.90GB/s ± 0% +0.04% RandomEncoderDefault-16 3.05GB/s ± 0% 3.05GB/s ± 0% -0.03% ``` --- zstd/enc_best.go | 1 + 1 file changed, 1 insertion(+) diff --git a/zstd/enc_best.go b/zstd/enc_best.go index 96028ecd83..53ffe423ac 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -32,6 +32,7 @@ type match struct { length int32 rep int32 est int32 + _ [12]byte } const highScore = 25000 From d608a6a8b9a09599f456fc81631dbde9cf632745 Mon Sep 17 00:00:00 2001 From: Ryan M Date: Sun, 25 Sep 2022 18:02:46 +0200 Subject: [PATCH 2/2] Add comment --- zstd/enc_best.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zstd/enc_best.go b/zstd/enc_best.go index 53ffe423ac..dbbb88d92b 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -32,7 +32,7 @@ type match struct { length int32 rep int32 est int32 - _ [12]byte + _ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes } const highScore = 25000