Skip to content

Commit

Permalink
zstd: Speed up when WithDecoderLowmem(false) (#599)
Browse files Browse the repository at this point in the history
Typically an improvement when limited by the execute stage (enwik9)

```
BenchmarkDecoderEnwik9/multithreaded-writer-32         	       1	1184100400 ns/op	 844.52 MB/s	13992400 B/op	     101 allocs/op
BenchmarkDecoderEnwik9/multithreaded-writer-himem-32   	       1	1095716200 ns/op	 912.65 MB/s	19551184 B/op	      63 allocs/op
BenchmarkDecoderEnwik9/singlethreaded-writer-32        	       1	1612467600 ns/op	 620.17 MB/s	 6602968 B/op	      29 allocs/op
BenchmarkDecoderEnwik9/singlethreaded-writerto-32      	       1	1631256700 ns/op	 613.02 MB/s	 6590776 B/op	      26 allocs/op
BenchmarkDecoderEnwik9/singlethreaded-himem-32         	       1	1338224000 ns/op	 747.26 MB/s	 8835640 B/op	      23 allocs/

BenchmarkDecoderSilesia/multithreaded-writer-32         	       5	 201332380 ns/op	1052.72 MB/s	 5130272 B/op	      74 allocs/op
BenchmarkDecoderSilesia/multithreaded-writer-himem-32   	       5	 201792940 ns/op	1050.32 MB/s	 3105158 B/op	      42 allocs/op
BenchmarkDecoderSilesia/singlethreaded-writer-32        	       4	 268667300 ns/op	 788.88 MB/s	 1648902 B/op	      19 allocs/op
BenchmarkDecoderSilesia/singlethreaded-writerto-32      	       4	 265867950 ns/op	 797.19 MB/s	 1642810 B/op	      15 allocs/op
BenchmarkDecoderSilesia/singlethreaded-himem-32         	       4	 264649325 ns/op	 800.86 MB/s	 1161658 B/op	       7 allocs/o
```
  • Loading branch information
klauspost committed May 24, 2022
1 parent 7484543 commit 131651f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 5 deletions.
47 changes: 44 additions & 3 deletions zstd/decoder_test.go
Expand Up @@ -1415,7 +1415,27 @@ func benchmarkDecoderWithFile(path string, b *testing.B) {
}

b.Run("multithreaded-writer", func(b *testing.B) {
dec, err := NewReader(nil)
dec, err := NewReader(nil, WithDecoderLowmem(true))
if err != nil {
b.Fatal(err)
}
b.SetBytes(n)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err = dec.Reset(bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
_, err := io.CopyN(ioutil.Discard, dec, n)
if err != nil {
b.Fatal(err)
}
}
})

b.Run("multithreaded-writer-himem", func(b *testing.B) {
dec, err := NewReader(nil, WithDecoderLowmem(false))
if err != nil {
b.Fatal(err)
}
Expand All @@ -1436,7 +1456,7 @@ func benchmarkDecoderWithFile(path string, b *testing.B) {
})

b.Run("singlethreaded-writer", func(b *testing.B) {
dec, err := NewReader(nil, WithDecoderConcurrency(1))
dec, err := NewReader(nil, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
b.Fatal(err)
}
Expand All @@ -1457,7 +1477,28 @@ func benchmarkDecoderWithFile(path string, b *testing.B) {
})

b.Run("singlethreaded-writerto", func(b *testing.B) {
dec, err := NewReader(nil, WithDecoderConcurrency(1))
dec, err := NewReader(nil, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
b.Fatal(err)
}

b.SetBytes(n)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err = dec.Reset(bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
// io.Copy will use io.WriterTo
_, err := io.Copy(ioutil.Discard, dec)
if err != nil {
b.Fatal(err)
}
}
})
b.Run("singlethreaded-himem", func(b *testing.B) {
dec, err := NewReader(nil, WithDecoderConcurrency(1), WithDecoderLowmem(false))
if err != nil {
b.Fatal(err)
}
Expand Down
5 changes: 3 additions & 2 deletions zstd/framedec.go
Expand Up @@ -253,10 +253,11 @@ func (d *frameDec) reset(br byteBuffer) error {
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
if d.o.lowMem && d.history.windowSize < maxBlockSize {
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
// Alloc 2x window size if not low-mem, or very small window size.
d.history.allocFrameBuffer = d.history.windowSize * 2
// TODO: Maybe use FrameContent size
} else {
// Alloc with one additional block
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}

Expand Down

0 comments on commit 131651f

Please sign in to comment.