diff --git a/flate/_gen/gen_inflate.go b/flate/_gen/gen_inflate.go index f3d4aa7a9d..ff43f79dd4 100644 --- a/flate/_gen/gen_inflate.go +++ b/flate/_gen/gen_inflate.go @@ -1,7 +1,8 @@ //go:build generate // +build generate -//go:generate go run $GOFILE && gofmt -w ../inflate_gen.go +//go:generate go run $GOFILE +//go:generate go fmt ../inflate_gen.go package main @@ -16,9 +17,9 @@ func main() { panic(err) } defer f.Close() - types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"} - names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"} - imports := []string{"bytes", "bufio", "io", "strings", "math/bits"} + types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader", "Reader"} + names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader", "GenericReader"} + imports := []string{"bytes", "bufio", "fmt", "strings", "math/bits"} f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. package flate @@ -44,6 +45,11 @@ func (f *decompressor) $FUNCNAME$() { ) fr := f.r.($TYPE$) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + switch f.stepState { case stateInit: goto readLiteral @@ -62,41 +68,35 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } @@ -111,10 +111,12 @@ readLiteral: f.toRead = f.dict.readFlush() f.step = (*decompressor).$FUNCNAME$ f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data @@ -124,9 +126,10 @@ readLiteral: val := decCodeToLen[(v - 257)] length = int(val.length) + 3 n := uint(val.extra) - for f.nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } @@ -134,25 +137,27 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb®SizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } var dist uint32 if f.hd == nil { - for f.nb < 5 { + for fnb < 5 { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } @@ -160,12 +165,12 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb®SizeMaskUint32) + fnb += 8 } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both @@ -175,38 +180,35 @@ readLiteral: // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hd.chunks[b&(huffmanNumChunks-1)] + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask] + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n dist = uint32(chunk >> huffmanValueShift) break } @@ -220,9 +222,10 @@ readLiteral: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { + for fnb < nb { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -247,6 +252,7 @@ readLiteral: // No check on length; encoding can be prescient. if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) } @@ -271,10 +277,12 @@ copyHistory: f.toRead = f.dict.readFlush() f.step = (*decompressor).$FUNCNAME$ // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } ` @@ -290,6 +298,6 @@ copyHistory: f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n") } f.WriteString("\t\tdefault:\n") - f.WriteString("\t\t\treturn f.huffmanBlockGeneric") + f.WriteString("\t\t\treturn f.huffmanGenericReader\n") f.WriteString("\t}\n}\n") } diff --git a/flate/inflate.go b/flate/inflate.go index d5f62f6a2c..414c0bea9f 100644 --- a/flate/inflate.go +++ b/flate/inflate.go @@ -36,6 +36,13 @@ type lengthExtra struct { var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + // Initialize the fixedHuffmanDecoder only once upon first use. var fixedOnce sync.Once var fixedHuffmanDecoder huffmanDecoder @@ -559,221 +566,6 @@ func (f *decompressor) readHuffman() error { return nil } -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlockGeneric() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - f.err = noEOF(err) - return - } - f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 - } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var n uint // number of bits extra - var length int - var err error - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlockGeneric - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n - } - - var dist uint32 - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - sym, err := f.huffSym(f.hd) - if err != nil { - if debugDecode { - fmt.Println("huffsym:", err) - } - f.err = err - return - } - dist = uint32(sym) - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { - if err = f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - default: - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { - if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - // Copy a single uncompressed data block from input to output. func (f *decompressor) dataBlock() { // Uncompressed. diff --git a/flate/inflate_gen.go b/flate/inflate_gen.go index cc6db27925..8d632cea0f 100644 --- a/flate/inflate_gen.go +++ b/flate/inflate_gen.go @@ -21,6 +21,11 @@ func (f *decompressor) huffmanBytesBuffer() { ) fr := f.r.(*bytes.Buffer) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + switch f.stepState { case stateInit: goto readLiteral @@ -39,41 +44,35 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } @@ -88,10 +87,12 @@ readLiteral: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data @@ -101,9 +102,10 @@ readLiteral: val := decCodeToLen[(v - 257)] length = int(val.length) + 3 n := uint(val.extra) - for f.nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } @@ -111,25 +113,27 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } var dist uint32 if f.hd == nil { - for f.nb < 5 { + for fnb < 5 { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } @@ -137,12 +141,12 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both @@ -152,38 +156,35 @@ readLiteral: // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hd.chunks[b&(huffmanNumChunks-1)] + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask] + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n dist = uint32(chunk >> huffmanValueShift) break } @@ -197,9 +198,10 @@ readLiteral: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { + for fnb < nb { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -224,6 +228,7 @@ readLiteral: // No check on length; encoding can be prescient. if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) } @@ -248,10 +253,12 @@ copyHistory: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -265,6 +272,11 @@ func (f *decompressor) huffmanBytesReader() { ) fr := f.r.(*bytes.Reader) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + switch f.stepState { case stateInit: goto readLiteral @@ -283,41 +295,35 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } @@ -332,10 +338,12 @@ readLiteral: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBytesReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data @@ -345,9 +353,10 @@ readLiteral: val := decCodeToLen[(v - 257)] length = int(val.length) + 3 n := uint(val.extra) - for f.nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } @@ -355,25 +364,27 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } var dist uint32 if f.hd == nil { - for f.nb < 5 { + for fnb < 5 { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } @@ -381,12 +392,12 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both @@ -396,38 +407,35 @@ readLiteral: // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hd.chunks[b&(huffmanNumChunks-1)] + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask] + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n dist = uint32(chunk >> huffmanValueShift) break } @@ -441,9 +449,10 @@ readLiteral: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { + for fnb < nb { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -468,6 +479,7 @@ readLiteral: // No check on length; encoding can be prescient. if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) } @@ -492,10 +504,12 @@ copyHistory: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBytesReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -509,6 +523,11 @@ func (f *decompressor) huffmanBufioReader() { ) fr := f.r.(*bufio.Reader) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + switch f.stepState { case stateInit: goto readLiteral @@ -527,41 +546,35 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } @@ -576,10 +589,12 @@ readLiteral: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBufioReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data @@ -589,9 +604,10 @@ readLiteral: val := decCodeToLen[(v - 257)] length = int(val.length) + 3 n := uint(val.extra) - for f.nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } @@ -599,25 +615,27 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } var dist uint32 if f.hd == nil { - for f.nb < 5 { + for fnb < 5 { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } @@ -625,12 +643,12 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both @@ -640,38 +658,35 @@ readLiteral: // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hd.chunks[b&(huffmanNumChunks-1)] + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask] + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n dist = uint32(chunk >> huffmanValueShift) break } @@ -685,9 +700,10 @@ readLiteral: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { + for fnb < nb { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -712,6 +730,7 @@ readLiteral: // No check on length; encoding can be prescient. if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) } @@ -736,10 +755,12 @@ copyHistory: f.toRead = f.dict.readFlush() f.step = (*decompressor).huffmanBufioReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } // Decode a single Huffman block from f. @@ -753,6 +774,11 @@ func (f *decompressor) huffmanStringsReader() { ) fr := f.r.(*strings.Reader) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + switch f.stepState { case stateInit: goto readLiteral @@ -771,41 +797,286 @@ readLiteral: // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanGenericReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb := f.nb, f.b + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n v = int(chunk >> huffmanValueShift) break } @@ -818,12 +1089,14 @@ readLiteral: f.dict.writeByte(byte(v)) if f.dict.availWrite() == 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanStringsReader + f.step = (*decompressor).huffmanGenericReader f.stepState = stateInit + f.b, f.nb = fb, fnb return } goto readLiteral case v == 256: + f.b, f.nb = fb, fnb f.finishBlock() return // otherwise, reference to older data @@ -833,9 +1106,10 @@ readLiteral: val := decCodeToLen[(v - 257)] length = int(val.length) + 3 n := uint(val.extra) - for f.nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits n>0:", err) } @@ -843,25 +1117,27 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - length += int(f.b & uint32(1<<(n®SizeMaskUint32)-1)) - f.b >>= n & regSizeMaskUint32 - f.nb -= n + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n default: if debugDecode { fmt.Println(v, ">= maxNumLit") } f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb return } var dist uint32 if f.hd == nil { - for f.nb < 5 { + for fnb < 5 { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb<5:", err) } @@ -869,12 +1145,12 @@ readLiteral: return } f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 } else { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both @@ -884,38 +1160,35 @@ readLiteral: // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b for { - for nb < n { + for fnb < n { c, err := fr.ReadByte() if err != nil { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb f.err = noEOF(err) return } f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 } - chunk := f.hd.chunks[b&(huffmanNumChunks-1)] + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask] + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] n = uint(chunk & huffmanCountMask) } - if n <= nb { + if n <= fnb { if n == 0 { - f.b = b - f.nb = nb + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("huffsym: n==0") } f.err = CorruptInputError(f.roffset) return } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n dist = uint32(chunk >> huffmanValueShift) break } @@ -929,9 +1202,10 @@ readLiteral: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << (nb & regSizeMaskUint32) - for f.nb < nb { + for fnb < nb { c, err := fr.ReadByte() if err != nil { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - f.nb -= nb + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra default: + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist too big:", dist, maxNumDist) } @@ -956,6 +1232,7 @@ readLiteral: // No check on length; encoding can be prescient. if dist > uint32(f.dict.histSize()) { + f.b, f.nb = fb, fnb if debugDecode { fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) } @@ -978,12 +1255,14 @@ copyHistory: if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.step = (*decompressor).huffmanGenericReader // We need to continue this work f.stepState = stateDict + f.b, f.nb = fb, fnb return } goto readLiteral } + // Not reached } func (f *decompressor) huffmanBlockDecoder() func() { @@ -996,7 +1275,9 @@ func (f *decompressor) huffmanBlockDecoder() func() { return f.huffmanBufioReader case *strings.Reader: return f.huffmanStringsReader + case Reader: + return f.huffmanGenericReader default: - return f.huffmanBlockGeneric + return f.huffmanGenericReader } }