From d4b89e7b673188999959e2ca5eda3cc63c1e1f2e Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 12 Sep 2022 20:04:35 +0200 Subject: [PATCH] Use faster gzip package (#982) * Use faster gzip package Before: ``` BenchmarkCompression/gzip/compress-32 81 13573535 ns/op 8.77 MB/s 10115 B/op 1 allocs/op BenchmarkCompression/gzip/decompress-32 580 2023225 ns/op 959.10 MB/s 7563 B/op 53 allocs/op gzip: 93.86% ``` After: ``` BenchmarkCompression/gzip/compress-32 224 5340019 ns/op 24.42 MB/s 4838 B/op 1 allocs/op BenchmarkCompression/gzip/decompress-32 692 1713771 ns/op 1132.28 MB/s 566 B/op 34 allocs/op gzip: 93.28% ```` Pretty typical scenario for default settings, 2-3x faster, sometimes at a slight compression loss. This provides a much better "default" trade-off than the stdlib. For people regretting the 0.6% loss the higher compression levels can be used. Decompression typically has even bigger margin, but it depends on the input. * Use gz as import alias. --- compress/compress_test.go | 9 ++++----- compress/gzip/gzip.go | 3 ++- go.mod | 2 +- go.sum | 4 ++-- gzip/gzip.go | 3 +-- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/compress/compress_test.go b/compress/compress_test.go index e4d2426d..1da84122 100644 --- a/compress/compress_test.go +++ b/compress/compress_test.go @@ -2,7 +2,6 @@ package compress_test import ( "bytes" - stdgzip "compress/gzip" "context" "fmt" "io" @@ -16,6 +15,7 @@ import ( "text/tabwriter" "time" + gz "github.com/klauspost/compress/gzip" "github.com/segmentio/kafka-go" pkg "github.com/segmentio/kafka-go/compress" "github.com/segmentio/kafka-go/compress/gzip" @@ -345,7 +345,7 @@ func BenchmarkCompression(b *testing.B) { } defer f.Close() - z, err := stdgzip.NewReader(f) + z, err := gz.NewReader(f) if err != nil { b.Fatal(err) } @@ -366,8 +366,6 @@ func BenchmarkCompression(b *testing.B) { fmt.Println(ts) }() - b.ResetTimer() - for i := range benchmarks { benchmark := &benchmarks[i] ratio := 0.0 @@ -389,6 +387,7 @@ func benchmarkCompression(b *testing.B, codec pkg.Codec, buf *bytes.Buffer, payl b.Run("compress", func(b *testing.B) { compressed = true r := bytes.NewReader(payload) + b.ReportAllocs() for i := 0; i < b.N; i++ { buf.Reset() @@ -422,7 +421,7 @@ func benchmarkCompression(b *testing.B, codec pkg.Codec, buf *bytes.Buffer, payl b.Run("decompress", func(b *testing.B) { c := bytes.NewReader(buf.Bytes()) - + b.ReportAllocs() for i := 0; i < b.N; i++ { c.Reset(buf.Bytes()) r := codec.NewReader(c) diff --git a/compress/gzip/gzip.go b/compress/gzip/gzip.go index 64da3129..ad5009c3 100644 --- a/compress/gzip/gzip.go +++ b/compress/gzip/gzip.go @@ -1,9 +1,10 @@ package gzip import ( - "compress/gzip" "io" "sync" + + "github.com/klauspost/compress/gzip" ) var ( diff --git a/go.mod b/go.mod index e3cd2c03..396d1a94 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/segmentio/kafka-go go 1.15 require ( - github.com/klauspost/compress v1.15.7 + github.com/klauspost/compress v1.15.9 github.com/pierrec/lz4/v4 v4.1.15 github.com/stretchr/testify v1.8.0 github.com/xdg/scram v1.0.5 diff --git a/go.sum b/go.sum index 3adcc897..b3a030b6 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/klauspost/compress v1.15.7 h1:7cgTQxJCU/vy+oP/E3B9RGbQTgbiVzIJWIKOLoAsPok= -github.com/klauspost/compress v1.15.7/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/gzip/gzip.go b/gzip/gzip.go index 2ad84b50..230e4539 100644 --- a/gzip/gzip.go +++ b/gzip/gzip.go @@ -4,8 +4,7 @@ package gzip import ( - gz "compress/gzip" - + gz "github.com/klauspost/compress/gzip" "github.com/segmentio/kafka-go/compress/gzip" )