From 099f285620f05ad28a4588baf28aaf27011a7ed6 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 8 Sep 2022 18:19:40 +0200 Subject: [PATCH 1/2] Use arrays for constant size copies Requires Go 1.17 Replaces #424 --- flate/deflate.go | 3 ++- flate/fast_encoder.go | 3 ++- flate/huffman_bit_writer.go | 8 +++++--- huff0/decompress.go | 24 ++++++++++++++++-------- huff0/decompress_generic.go | 12 ++++++++---- zstd/enc_dfast.go | 7 +++++-- zstd/enc_fast.go | 6 ++++-- 7 files changed, 42 insertions(+), 21 deletions(-) diff --git a/flate/deflate.go b/flate/deflate.go index f8435998e5..f00da5b214 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -131,7 +131,8 @@ func (d *compressor) fillDeflate(b []byte) int { s := d.state if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { // shift the window by windowSize - copy(d.window[:], d.window[windowSize:2*windowSize]) + //copy(d.window[:], d.window[windowSize:2*windowSize]) + *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) s.index -= windowSize d.windowEnd -= windowSize if d.blockStart >= windowSize { diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index f781aaa625..cd77a2cc42 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -104,7 +104,8 @@ func (e *fastGen) addBlock(src []byte) int32 { } // Move down offset := int32(len(e.hist)) - maxMatchOffset - copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) e.cur += offset e.hist = e.hist[:maxMatchOffset] } diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index 25a9085853..89a5dd89f9 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -790,9 +790,11 @@ func (w *huffmanBitWriter) fillTokens() { // and offsetEncoding. // The number of literal and offset tokens is returned. func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { - copy(w.literalFreq[:], t.litHist[:]) - copy(w.literalFreq[256:], t.extraHist[:]) - copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + //copy(w.literalFreq[:], t.litHist[:]) + *(*[256]uint16)(w.literalFreq[:]) = t.litHist + //copy(w.literalFreq[256:], t.extraHist[:]) + *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist + w.offsetFreq = t.offHist if t.n == 0 { return diff --git a/huff0/decompress.go b/huff0/decompress.go index c0c48bd707..aa6a266bc5 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -763,10 +763,14 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[0][:]) - copy(out[dstEvery:], buf[1][:]) - copy(out[dstEvery*2:], buf[2][:]) - copy(out[dstEvery*3:], buf[3][:]) + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + //copy(out[dstEvery*3:], buf[3][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 // There must at least be 3 buffers left. @@ -997,10 +1001,14 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[0][:]) - copy(out[dstEvery:], buf[1][:]) - copy(out[dstEvery*2:], buf[2][:]) - copy(out[dstEvery*3:], buf[3][:]) + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + // copy(out[dstEvery*3:], buf[3][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 // There must at least be 3 buffers left. diff --git a/huff0/decompress_generic.go b/huff0/decompress_generic.go index 4f6f37cb2c..adef214764 100644 --- a/huff0/decompress_generic.go +++ b/huff0/decompress_generic.go @@ -122,10 +122,14 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[0][:]) - copy(out[dstEvery:], buf[1][:]) - copy(out[dstEvery*2:], buf[2][:]) - copy(out[dstEvery*3:], buf[3][:]) + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + //copy(out[dstEvery*3:], buf[3][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 // There must at least be 3 buffers left. diff --git a/zstd/enc_dfast.go b/zstd/enc_dfast.go index 7ff0c64fa3..1f4a9a2455 100644 --- a/zstd/enc_dfast.go +++ b/zstd/enc_dfast.go @@ -1103,7 +1103,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { } if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { - copy(e.longTable[:], e.dictLongTable) + //copy(e.longTable[:], e.dictLongTable) + e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable) for i := range e.longTableShardDirty { e.longTableShardDirty[i] = false } @@ -1114,7 +1115,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { continue } - copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:]) + e.longTableShardDirty[i] = false } } diff --git a/zstd/enc_fast.go b/zstd/enc_fast.go index f51ab529a0..181edc02b6 100644 --- a/zstd/enc_fast.go +++ b/zstd/enc_fast.go @@ -871,7 +871,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { const shardCnt = tableShardCnt const shardSize = tableShardSize if e.allDirty || dirtyShardCnt > shardCnt*4/6 { - copy(e.table[:], e.dictTable) + //copy(e.table[:], e.dictTable) + e.table = *(*[tableSize]tableEntry)(e.dictTable) for i := range e.tableShardDirty { e.tableShardDirty[i] = false } @@ -883,7 +884,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { continue } - copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:]) e.tableShardDirty[i] = false } e.allDirty = false From c4d0fc6053e4f723e105329a1d9b0948724eb5e3 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 8 Sep 2022 18:52:40 +0200 Subject: [PATCH 2/2] Check length before copy --- huff0/decompress.go | 22 +++++++++++----------- huff0/decompress_generic.go | 10 +++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/huff0/decompress.go b/huff0/decompress.go index aa6a266bc5..42a237eac4 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -763,21 +763,20 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } + // There must at least be 3 buffers left. + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 2") + } //copy(out, buf[0][:]) //copy(out[dstEvery:], buf[1][:]) //copy(out[dstEvery*2:], buf[2][:]) - //copy(out[dstEvery*3:], buf[3][:]) *(*[bufoff]byte)(out) = buf[0] *(*[bufoff]byte)(out[dstEvery:]) = buf[1] *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } } } if off > 0 { @@ -1001,6 +1000,12 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } + // There must at least be 3 buffers left. + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 2") + } + //copy(out, buf[0][:]) //copy(out[dstEvery:], buf[1][:]) //copy(out[dstEvery*2:], buf[2][:]) @@ -1011,11 +1016,6 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } } } if off > 0 { diff --git a/huff0/decompress_generic.go b/huff0/decompress_generic.go index adef214764..908c17de63 100644 --- a/huff0/decompress_generic.go +++ b/huff0/decompress_generic.go @@ -122,6 +122,11 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } + // There must at least be 3 buffers left. + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 2") + } //copy(out, buf[0][:]) //copy(out[dstEvery:], buf[1][:]) //copy(out[dstEvery*2:], buf[2][:]) @@ -132,11 +137,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] out = out[bufoff:] decoded += bufoff * 4 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } } } if off > 0 {