diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index fda6a55ebd..2f10f64a3e 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -183,6 +183,11 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute ec.moPtr = moP ec.mlPtr = mlP ec.llPtr = llP + zero := GP64() + XORQ(zero, zero) + MOVQ(zero, moP) + MOVQ(zero, mlP) + MOVQ(zero, llP) ec.outBase = GP64() ec.outEndPtr = AllocLocal(8) @@ -338,11 +343,14 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute Comment("Adjust offset") var offset reg.GPVirtual + end := LabelRef(name + "_after_adjust") if o.useSeqs { - offset = o.adjustOffset(name+"_adjust", moP, llP, R14, &offsets) + offset = o.adjustOffset(name+"_adjust", moP, llP, R14, &offsets, end) } else { - offset = o.adjustOffsetInMemory(name+"_adjust", moP, llP, R14) + offset = o.adjustOffsetInMemory(name+"_adjust", moP, llP, R14, end) } + Label(name + "_after_adjust") + MOVQ(offset, moP) // Store offset Comment("Check values") @@ -586,26 +594,25 @@ func (o options) updateLength(name string, brValue, brBitsRead, state reg.GPVirt MOVQ(state, AX.As64()) // So we can grab high bytes. MOVQ(brBitsRead, CX.As64()) MOVQ(brValue, BX) - SHLQ(CX, BX) // BX = br.value << br.bitsRead (part of getBits) - MOVB(AX.As8H(), CX.As8L()) // CX = moB (ofState.addBits(), that is byte #1 of moState) - ADDQ(CX.As64(), brBitsRead) // br.bitsRead += n (part of getBits) - NEGL(CX.As32()) // CX = 64 - n - SHRQ(CX, BX) // BX = (br.value << br.bitsRead) >> (64 - n) -- getBits() result - SHRQ(U8(32), AX) // AX = mo (ofState.baselineInt(), that's the higher dword of moState) + SHLQ(CX, BX) // BX = br.value << br.bitsRead (part of getBits) + MOVB(AX.As8H(), CX.As8L()) // CX = moB (ofState.addBits(), that is byte #1 of moState) + SHRQ(U8(32), AX) // AX = mo (ofState.baselineInt(), that's the higher dword of moState) + // If addBits == 0, skip TESTQ(CX.As64(), CX.As64()) - CMOVQEQ(CX.As64(), BX) // BX is zero if n is zero + JZ(LabelRef(name + "_zero")) - // Check if AX is reasonable - assert(func(ok LabelRef) { - CMPQ(AX, U32(1<<28)) - JB(ok) - }) - // Check if BX is reasonable - assert(func(ok LabelRef) { - CMPQ(BX, U32(1<<28)) - JB(ok) - }) - ADDQ(BX, AX) // AX - mo + br.getBits(moB) + ADDQ(CX.As64(), brBitsRead) // br.bitsRead += n (part of getBits) + // If overread, skip + CMPQ(brBitsRead, U8(64)) + JA(LabelRef(name + "_zero")) + CMPQ(CX.As64(), U8(64)) + JAE(LabelRef(name + "_zero")) + + NEGQ(CX.As64()) // CX = 64 - n + SHRQ(CX, BX) // BX = (br.value << br.bitsRead) >> (64 - n) -- getBits() result + ADDQ(BX, AX) // AX - mo + br.getBits(moB) + + Label(name + "_zero") MOVQ(AX, out) // Store result } } @@ -717,7 +724,7 @@ func (o options) getBits(nBits, brValue, brBitsRead reg.GPVirtual) reg.GPVirtual return BX } -func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, offsets *[3]reg.GPVirtual) (offset reg.GPVirtual) { +func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, offsets *[3]reg.GPVirtual, end LabelRef) (offset reg.GPVirtual) { offset = GP64() MOVQ(moP, offset) { @@ -733,7 +740,7 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, MOVQ(offsets[1], offsets[2]) // s.prevOffset[2] = s.prevOffset[1] MOVQ(offsets[0], offsets[1]) // s.prevOffset[1] = s.prevOffset[0] MOVQ(offset, offsets[0]) // s.prevOffset[0] = offset - JMP(LabelRef(name + "_end")) + JMP(end) } Label(name + "_offsetB_1_or_0") @@ -762,7 +769,7 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, TESTQ(offset, offset) JNZ(LabelRef(name + "_offset_nonzero")) MOVQ(offsets[0], offset) - JMP(LabelRef(name + "_end")) + JMP(end) } } Label(name + "_offset_nonzero") @@ -821,13 +828,13 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, MOVQ(temp, offsets[0]) MOVQ(temp, offset) // return temp } - Label(name + "_end") + JMP(end) return offset } // adjustOffsetInMemory is an adjustOffset version that does not cache prevOffset values in registers. // It fetches and stores values directly into the fields of `sequenceDecs` structure. -func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPVirtual) (offset reg.GPVirtual) { +func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPVirtual, end LabelRef) (offset reg.GPVirtual) { s := Dereference(Param("s")) po0, _ := s.Field("prevOffset").Index(0).Resolve() @@ -849,26 +856,19 @@ func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPV MOVUPS(po0.Addr, tmp) // tmp = (s.prevOffset[0], s.prevOffset[1]) MOVQ(offset, po0.Addr) // s.prevOffset[0] = offset MOVUPS(tmp, po1.Addr) // s.prevOffset[1], s.prevOffset[2] = s.prevOffset[0], s.prevOffset[1] - JMP(LabelRef(name + "_end")) + JMP(end) } Label(name + "_offsetB_1_or_0") // if litLen == 0 { // offset++ // } + { - if true { - CMPQ(llP, U32(0)) - JNE(LabelRef(name + "_offset_maybezero")) - INCQ(offset) - JMP(LabelRef(name + "_offset_nonzero")) - } else { - // No idea why this doesn't work: - tmp := GP64() - LEAQ(Mem{Base: offset, Disp: 1}, tmp) - CMPQ(llP, U32(0)) - CMOVQEQ(tmp, offset) - } + CMPQ(llP, U32(0)) + JNE(LabelRef(name + "_offset_maybezero")) + INCQ(offset) + JMP(LabelRef(name + "_offset_nonzero")) // if offset == 0 { // return s.prevOffset[0] @@ -878,11 +878,27 @@ func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPV TESTQ(offset, offset) JNZ(LabelRef(name + "_offset_nonzero")) MOVQ(po0.Addr, offset) - JMP(LabelRef(name + "_end")) + JMP(end) } } Label(name + "_offset_nonzero") { + // Offset must be 1 -> 3 + assert(func(ok LabelRef) { + // Test is above or equal (shouldn't be equal) + CMPQ(offset, U32(0)) + JAE(ok) + }) + assert(func(ok LabelRef) { + // Check if Above 0. + CMPQ(offset, U32(0)) + JA(ok) + }) + assert(func(ok LabelRef) { + // Check if Below or Equal to 3. + CMPQ(offset, U32(3)) + JBE(ok) + }) // if offset == 3 { // temp = s.prevOffset[0] - 1 // } else { @@ -906,9 +922,23 @@ func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPV CMPQ(offset, U8(3)) CMOVQEQ(DX, CX) CMOVQEQ(R15, DX) - prevOffset := GP64() - LEAQ(po0.Addr, prevOffset) // &prevOffset[0] - ADDQ(Mem{Base: prevOffset, Index: CX, Scale: 8}, DX) + assert(func(ok LabelRef) { + CMPQ(CX, U32(0)) + JAE(ok) + }) + assert(func(ok LabelRef) { + CMPQ(CX, U32(3)) + JB(ok) + }) + if po0.Addr.Index != nil { + // Use temporary (not currently needed) + prevOffset := GP64() + LEAQ(po0.Addr, prevOffset) // &prevOffset[0] + ADDQ(Mem{Base: prevOffset, Index: CX, Scale: 8}, DX) + } else { + ADDQ(Mem{Base: po0.Addr.Base, Disp: po0.Addr.Disp, Index: CX, Scale: 8}, DX) + } + temp := DX // if temp == 0 { // temp = 1 @@ -935,7 +965,7 @@ func (o options) adjustOffsetInMemory(name string, moP, llP Mem, offsetB reg.GPV MOVQ(temp, po0.Addr) // s.prevOffset[0] = temp MOVQ(temp, offset) // return temp } - Label(name + "_end") + JMP(end) return offset } diff --git a/zstd/bytebuf.go b/zstd/bytebuf.go index 4493baa756..2ad02070d7 100644 --- a/zstd/bytebuf.go +++ b/zstd/bytebuf.go @@ -23,7 +23,7 @@ type byteBuffer interface { readByte() (byte, error) // Skip n bytes. - skipN(n int) error + skipN(n int64) error } // in-memory buffer @@ -62,9 +62,12 @@ func (b *byteBuf) readByte() (byte, error) { return r, nil } -func (b *byteBuf) skipN(n int) error { +func (b *byteBuf) skipN(n int64) error { bb := *b - if len(bb) < n { + if n < 0 { + return fmt.Errorf("negative skip (%d) requested", n) + } + if int64(len(bb)) < n { return io.ErrUnexpectedEOF } *b = bb[n:] @@ -120,9 +123,9 @@ func (r *readerWrapper) readByte() (byte, error) { return r.tmp[0], nil } -func (r *readerWrapper) skipN(n int) error { - n2, err := io.CopyN(ioutil.Discard, r.r, int64(n)) - if n2 != int64(n) { +func (r *readerWrapper) skipN(n int64) error { + n2, err := io.CopyN(ioutil.Discard, r.r, n) + if n2 != n { err = io.ErrUnexpectedEOF } return err diff --git a/zstd/dict_test.go b/zstd/dict_test.go index 013707c5fb..5c587a7ac7 100644 --- a/zstd/dict_test.go +++ b/zstd/dict_test.go @@ -13,24 +13,7 @@ import ( func TestDecoder_SmallDict(t *testing.T) { // All files have CRC zr := testCreateZipReader("testdata/dict-tests-small.zip", t) - var dicts [][]byte - for _, tt := range zr.File { - if !strings.HasSuffix(tt.Name, ".dict") { - continue - } - func() { - r, err := tt.Open() - if err != nil { - t.Fatal(err) - } - defer r.Close() - in, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - dicts = append(dicts, in) - }() - } + dicts := readDicts(t, zr) dec, err := NewReader(nil, WithDecoderConcurrency(1), WithDecoderDicts(dicts...)) if err != nil { t.Fatal(err) @@ -453,3 +436,25 @@ func TestDecoder_MoreDicts2(t *testing.T) { }) } } + +func readDicts(tb testing.TB, zr *zip.Reader) [][]byte { + var dicts [][]byte + for _, tt := range zr.File { + if !strings.HasSuffix(tt.Name, ".dict") { + continue + } + func() { + r, err := tt.Open() + if err != nil { + tb.Fatal(err) + } + defer r.Close() + in, err := ioutil.ReadAll(r) + if err != nil { + tb.Fatal(err) + } + dicts = append(dicts, in) + }() + } + return dicts +} diff --git a/zstd/framedec.go b/zstd/framedec.go index 9311ef51f5..9568a4ba31 100644 --- a/zstd/framedec.go +++ b/zstd/framedec.go @@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error { } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err = br.skipN(int(n)) + err = br.skipN(int64(n)) if err != nil { if debugDecoder { println("Reading discarded frame", err) diff --git a/zstd/fse_decoder_amd64.go b/zstd/fse_decoder_amd64.go index e74df436cf..c881d28d88 100644 --- a/zstd/fse_decoder_amd64.go +++ b/zstd/fse_decoder_amd64.go @@ -34,8 +34,8 @@ const ( // buildDtable will build the decoding table. func (s *fseDecoder) buildDtable() error { ctx := buildDtableAsmContext{ - stateTable: (*uint16)(&s.stateTable[0]), - norm: (*int16)(&s.norm[0]), + stateTable: &s.stateTable[0], + norm: &s.norm[0], dt: (*uint64)(&s.dt[0]), } code := buildDtable_asm(s, &ctx) diff --git a/zstd/fuzz_test.go b/zstd/fuzz_test.go new file mode 100644 index 0000000000..1080dc28b7 --- /dev/null +++ b/zstd/fuzz_test.go @@ -0,0 +1,373 @@ +//go:build go1.18 +// +build go1.18 + +package zstd + +import ( + "bytes" + "fmt" + "go/ast" + "go/parser" + "go/token" + "io" + "io/ioutil" + "os" + rdebug "runtime/debug" + "strconv" + "testing" + + "github.com/klauspost/compress/internal/cpuinfo" + "github.com/klauspost/compress/zip" +) + +func FuzzDecodeAll(f *testing.F) { + addBytesFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", true) + addBytesFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", false) + decLow, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decLow.Close() + decHi, err := NewReader(nil, WithDecoderLowmem(false), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decHi.Close() + + f.Fuzz(func(t *testing.T, b []byte) { + // Just test if we crash... + defer func() { + if r := recover(); r != nil { + rdebug.PrintStack() + t.Fatal(r) + } + }() + b1, err1 := decLow.DecodeAll(b, nil) + b2, err2 := decHi.DecodeAll(b, nil) + if err1 != err2 { + t.Log(err1, err2) + } + if err1 != nil { + b1, b2 = b1[:0], b2[:0] + } + if !bytes.Equal(b1, b2) { + t.Fatalf("Output mismatch, low: %v, hi: %v", err1, err2) + } + }) +} + +func FuzzDecodeAllNoBMI2(f *testing.F) { + if !cpuinfo.HasBMI2() { + f.Skip("No BMI, so already tested") + return + } + defer cpuinfo.DisableBMI2()() + FuzzDecodeAll(f) +} + +func FuzzDecoder(f *testing.F) { + addBytesFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", true) + addBytesFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", false) + decLow, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decLow.Close() + // Test with high memory, but sync decoding + decHi, err := NewReader(nil, WithDecoderLowmem(false), WithDecoderConcurrency(1), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decHi.Close() + + f.Fuzz(func(t *testing.T, b []byte) { + // Just test if we crash... + defer func() { + if r := recover(); r != nil { + rdebug.PrintStack() + t.Fatal(r) + } + }() + err := decLow.Reset(io.NopCloser(bytes.NewReader(b))) + if err != nil { + t.Fatal(err) + } + err = decHi.Reset(io.NopCloser(bytes.NewReader(b))) + if err != nil { + t.Fatal(err) + } + b1, err1 := ioutil.ReadAll(decLow) + b2, err2 := ioutil.ReadAll(decHi) + if err1 != err2 { + t.Log(err1, err2) + } + if err1 != nil { + b1, b2 = b1[:0], b2[:0] + } + if !bytes.Equal(b1, b2) { + t.Fatalf("Output mismatch, low: %v, hi: %v", err1, err2) + } + }) +} + +func FuzzEncoding(f *testing.F) { + addBytesFromZip(f, "testdata/fuzz/encode-corpus-raw.zip", true) + addBytesFromZip(f, "testdata/comp-crashers.zip", true) + addBytesFromZip(f, "testdata/fuzz/encode-corpus-encoded.zip", false) + // Fuzzing tweaks: + const ( + // Test a subset of encoders. + startFuzz = SpeedFastest + endFuzz = SpeedBestCompression + + // Also tests with dictionaries... + testDicts = true + + // Max input size: + maxSize = 1 << 20 + ) + + var dec *Decoder + var encs [SpeedBestCompression + 1]*Encoder + var encsD [SpeedBestCompression + 1]*Encoder + + var dicts [][]byte + if testDicts { + zr := testCreateZipReader("testdata/dict-tests-small.zip", f) + dicts = readDicts(f, zr) + } + + initEnc := func() func() { + var err error + dec, err = NewReader(nil, WithDecoderConcurrency(2), WithDecoderDicts(dicts...), WithDecoderMaxWindow(128<<10), WithDecoderMaxMemory(maxSize)) + if err != nil { + panic(err) + } + for level := startFuzz; level <= endFuzz; level++ { + encs[level], err = NewWriter(nil, WithEncoderCRC(true), WithEncoderLevel(level), WithEncoderConcurrency(2), WithWindowSize(128<<10), WithZeroFrames(true), WithLowerEncoderMem(true)) + if testDicts { + encsD[level], err = NewWriter(nil, WithEncoderCRC(true), WithEncoderLevel(level), WithEncoderConcurrency(2), WithWindowSize(128<<10), WithZeroFrames(true), WithEncoderDict(dicts[0]), WithLowerEncoderMem(true), WithLowerEncoderMem(true)) + } + } + return func() { + dec.Close() + for _, enc := range encs { + if enc != nil { + enc.Close() + } + } + if testDicts { + for _, enc := range encsD { + if enc != nil { + enc.Close() + } + } + } + } + } + + f.Cleanup(initEnc()) + + var dst bytes.Buffer + + f.Fuzz(func(t *testing.T, data []byte) { + // Just test if we crash... + defer func() { + if r := recover(); r != nil { + rdebug.PrintStack() + t.Fatal(r) + } + }() + if len(data) > maxSize { + return + } + var bufSize = len(data) + if bufSize > 2 { + // Make deterministic size + bufSize = int(data[0]) | int(data[1])<<8 + if bufSize >= len(data) { + bufSize = len(data) / 2 + } + } + + for level := startFuzz; level <= endFuzz; level++ { + enc := encs[level] + dst.Reset() + enc.Reset(&dst) + n, err := enc.Write(data) + if err != nil { + t.Fatal(err) + } + if n != len(data) { + t.Fatal(fmt.Sprintln("Level", level, "Short write, got:", n, "want:", len(data))) + } + + encoded := enc.EncodeAll(data, make([]byte, 0, bufSize)) + got, err := dec.DecodeAll(encoded, make([]byte, 0, bufSize)) + if err != nil { + t.Fatal(fmt.Sprintln("Level", level, "DecodeAll error:", err, "\norg:", len(data), "\nencoded", len(encoded))) + } + if !bytes.Equal(got, data) { + t.Fatal(fmt.Sprintln("Level", level, "DecodeAll output mismatch\n", len(got), "org: \n", len(data), "(want)", "\nencoded:", len(encoded))) + } + + err = enc.Close() + if err != nil { + t.Fatal(fmt.Sprintln("Level", level, "Close (buffer) error:", err)) + } + encoded2 := dst.Bytes() + if !bytes.Equal(encoded, encoded2) { + got, err = dec.DecodeAll(encoded2, got[:0]) + if err != nil { + t.Fatal(fmt.Sprintln("Level", level, "DecodeAll (buffer) error:", err, "\norg:", len(data), "\nencoded", len(encoded2))) + } + if !bytes.Equal(got, data) { + t.Fatal(fmt.Sprintln("Level", level, "DecodeAll (buffer) output mismatch\n", len(got), "org: \n", len(data), "(want)", "\nencoded:", len(encoded2))) + } + } + if !testDicts { + continue + } + enc = encsD[level] + dst.Reset() + enc.Reset(&dst) + n, err = enc.Write(data) + if err != nil { + t.Fatal(err) + } + if n != len(data) { + t.Fatal(fmt.Sprintln("Dict Level", level, "Short write, got:", n, "want:", len(data))) + } + + encoded = enc.EncodeAll(data, encoded[:0]) + got, err = dec.DecodeAll(encoded, got[:0]) + if err != nil { + t.Fatal(fmt.Sprintln("Dict Level", level, "DecodeAll error:", err, "\norg:", len(data), "\nencoded", len(encoded))) + } + if !bytes.Equal(got, data) { + t.Fatal(fmt.Sprintln("Dict Level", level, "DecodeAll output mismatch\n", len(got), "org: \n", len(data), "(want)", "\nencoded:", len(encoded))) + } + + err = enc.Close() + if err != nil { + t.Fatal(fmt.Sprintln("Dict Level", level, "Close (buffer) error:", err)) + } + encoded2 = dst.Bytes() + if !bytes.Equal(encoded, encoded2) { + got, err = dec.DecodeAll(encoded2, got[:0]) + if err != nil { + t.Fatal(fmt.Sprintln("Dict Level", level, "DecodeAll (buffer) error:", err, "\norg:", len(data), "\nencoded", len(encoded2))) + } + if !bytes.Equal(got, data) { + t.Fatal(fmt.Sprintln("Dict Level", level, "DecodeAll (buffer) output mismatch\n", len(got), "org: \n", len(data), "(want)", "\nencoded:", len(encoded2))) + } + } + } + }) +} + +func addBytesFromZip(f *testing.F, filename string, raw bool) { + file, err := os.Open(filename) + if err != nil { + f.Fatal(err) + } + fi, err := file.Stat() + if err != nil { + f.Fatal(err) + } + zr, err := zip.NewReader(file, fi.Size()) + if err != nil { + f.Fatal(err) + } + for i, file := range zr.File { + if testing.Short() && i%10 != 0 { + continue + } + rc, err := file.Open() + if err != nil { + f.Fatal(err) + } + + b, err := io.ReadAll(rc) + if err != nil { + f.Fatal(err) + } + rc.Close() + raw := raw + if bytes.HasPrefix(b, []byte("go test fuzz")) { + raw = false + } + if raw { + f.Add(b) + continue + } + vals, err := unmarshalCorpusFile(b) + if err != nil { + f.Fatal(err) + } + for _, v := range vals { + f.Add(v) + } + } +} + +// unmarshalCorpusFile decodes corpus bytes into their respective values. +func unmarshalCorpusFile(b []byte) ([][]byte, error) { + if len(b) == 0 { + return nil, fmt.Errorf("cannot unmarshal empty string") + } + lines := bytes.Split(b, []byte("\n")) + if len(lines) < 2 { + return nil, fmt.Errorf("must include version and at least one value") + } + var vals = make([][]byte, 0, len(lines)-1) + for _, line := range lines[1:] { + line = bytes.TrimSpace(line) + if len(line) == 0 { + continue + } + v, err := parseCorpusValue(line) + if err != nil { + return nil, fmt.Errorf("malformed line %q: %v", line, err) + } + vals = append(vals, v) + } + return vals, nil +} + +// parseCorpusValue +func parseCorpusValue(line []byte) ([]byte, error) { + fs := token.NewFileSet() + expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) + if err != nil { + return nil, err + } + call, ok := expr.(*ast.CallExpr) + if !ok { + return nil, fmt.Errorf("expected call expression") + } + if len(call.Args) != 1 { + return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) + } + arg := call.Args[0] + + if arrayType, ok := call.Fun.(*ast.ArrayType); ok { + if arrayType.Len != nil { + return nil, fmt.Errorf("expected []byte or primitive type") + } + elt, ok := arrayType.Elt.(*ast.Ident) + if !ok || elt.Name != "byte" { + return nil, fmt.Errorf("expected []byte") + } + lit, ok := arg.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return nil, fmt.Errorf("string literal required for type []byte") + } + s, err := strconv.Unquote(lit.Value) + if err != nil { + return nil, err + } + return []byte(s), nil + } + return nil, fmt.Errorf("expected []byte") +} diff --git a/zstd/seqdec_amd64.s b/zstd/seqdec_amd64.s index 71e64e0612..27e76774ca 100644 --- a/zstd/seqdec_amd64.s +++ b/zstd/seqdec_amd64.s @@ -52,34 +52,46 @@ sequenceDecs_decode_amd64_fill_byte_by_byte: sequenceDecs_decode_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 16(R10) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_of_update_zero: + MOVQ AX, 16(R10) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 8(R10) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ml_update_zero: + MOVQ AX, 8(R10) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -107,19 +119,25 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte: sequenceDecs_decode_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, (R10) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ll_update_zero: + MOVQ AX, (R10) // Fill bitreader for state updates MOVQ R14, (SP) @@ -198,7 +216,7 @@ sequenceDecs_decode_amd64_skip_update: MOVQ R12, R13 MOVQ R11, R12 MOVQ CX, R11 - JMP sequenceDecs_decode_amd64_adjust_end + JMP sequenceDecs_decode_amd64_after_adjust sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: CMPQ (R10), $0x00000000 @@ -210,7 +228,7 @@ sequenceDecs_decode_amd64_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero MOVQ R11, CX - JMP sequenceDecs_decode_amd64_adjust_end + JMP sequenceDecs_decode_amd64_after_adjust sequenceDecs_decode_amd64_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -247,7 +265,7 @@ sequenceDecs_decode_amd64_adjust_temp_valid: MOVQ AX, R11 MOVQ AX, CX -sequenceDecs_decode_amd64_adjust_end: +sequenceDecs_decode_amd64_after_adjust: MOVQ CX, 16(R10) // Check values @@ -356,49 +374,67 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte: sequenceDecs_decode_56_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 16(R10) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_of_update_zero: + MOVQ AX, 16(R10) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 8(R10) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ml_update_zero: + MOVQ AX, 8(R10) // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, (R10) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ll_update_zero: + MOVQ AX, (R10) // Fill bitreader for state updates MOVQ R14, (SP) @@ -477,7 +513,7 @@ sequenceDecs_decode_56_amd64_skip_update: MOVQ R12, R13 MOVQ R11, R12 MOVQ CX, R11 - JMP sequenceDecs_decode_56_amd64_adjust_end + JMP sequenceDecs_decode_56_amd64_after_adjust sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: CMPQ (R10), $0x00000000 @@ -489,7 +525,7 @@ sequenceDecs_decode_56_amd64_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero MOVQ R11, CX - JMP sequenceDecs_decode_56_amd64_adjust_end + JMP sequenceDecs_decode_56_amd64_after_adjust sequenceDecs_decode_56_amd64_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -526,7 +562,7 @@ sequenceDecs_decode_56_amd64_adjust_temp_valid: MOVQ AX, R11 MOVQ AX, CX -sequenceDecs_decode_56_amd64_adjust_end: +sequenceDecs_decode_56_amd64_after_adjust: MOVQ CX, 16(R10) // Check values @@ -757,7 +793,7 @@ sequenceDecs_decode_bmi2_skip_update: MOVQ R11, R12 MOVQ R10, R11 MOVQ CX, R10 - JMP sequenceDecs_decode_bmi2_adjust_end + JMP sequenceDecs_decode_bmi2_after_adjust sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: CMPQ (R9), $0x00000000 @@ -769,7 +805,7 @@ sequenceDecs_decode_bmi2_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero MOVQ R10, CX - JMP sequenceDecs_decode_bmi2_adjust_end + JMP sequenceDecs_decode_bmi2_after_adjust sequenceDecs_decode_bmi2_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -806,7 +842,7 @@ sequenceDecs_decode_bmi2_adjust_temp_valid: MOVQ R13, R10 MOVQ R13, CX -sequenceDecs_decode_bmi2_adjust_end: +sequenceDecs_decode_bmi2_after_adjust: MOVQ CX, 16(R9) // Check values @@ -1012,7 +1048,7 @@ sequenceDecs_decode_56_bmi2_skip_update: MOVQ R11, R12 MOVQ R10, R11 MOVQ CX, R10 - JMP sequenceDecs_decode_56_bmi2_adjust_end + JMP sequenceDecs_decode_56_bmi2_after_adjust sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: CMPQ (R9), $0x00000000 @@ -1024,7 +1060,7 @@ sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero MOVQ R10, CX - JMP sequenceDecs_decode_56_bmi2_adjust_end + JMP sequenceDecs_decode_56_bmi2_after_adjust sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -1061,7 +1097,7 @@ sequenceDecs_decode_56_bmi2_adjust_temp_valid: MOVQ R13, R10 MOVQ R13, CX -sequenceDecs_decode_56_bmi2_adjust_end: +sequenceDecs_decode_56_bmi2_after_adjust: MOVQ CX, 16(R9) // Check values @@ -1749,6 +1785,10 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 72(AX), DI MOVQ 80(AX), R8 MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) MOVQ 112(AX), R10 MOVQ 128(AX), CX MOVQ CX, 32(SP) @@ -1798,34 +1838,46 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 8(SP) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_of_update_zero: + MOVQ AX, 8(SP) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 16(SP) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ml_update_zero: + MOVQ AX, 16(SP) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -1853,19 +1905,25 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 24(SP) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ll_update_zero: + MOVQ AX, 24(SP) // Fill bitreader for state updates MOVQ R13, (SP) @@ -1945,7 +2003,7 @@ sequenceDecs_decodeSync_amd64_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_amd64_adjust_end + JMP sequenceDecs_decodeSync_amd64_after_adjust sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -1957,7 +2015,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_amd64_adjust_end + JMP sequenceDecs_decodeSync_amd64_after_adjust sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: MOVQ R13, AX @@ -1966,8 +2024,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, AX CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(AX*8), R14 + ADDQ 144(CX)(AX*8), R14 JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid MOVQ $0x00000001, R14 @@ -1983,7 +2040,7 @@ sequenceDecs_decodeSync_amd64_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_amd64_adjust_end: +sequenceDecs_decodeSync_amd64_after_adjust: MOVQ R13, 8(SP) // Check values @@ -2280,6 +2337,10 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 72(CX), SI MOVQ 80(CX), DI MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) MOVQ 112(CX), R9 MOVQ 128(CX), R10 MOVQ R10, 32(SP) @@ -2452,7 +2513,7 @@ sequenceDecs_decodeSync_bmi2_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_bmi2_adjust_end + JMP sequenceDecs_decodeSync_bmi2_after_adjust sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -2464,7 +2525,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_bmi2_adjust_end + JMP sequenceDecs_decodeSync_bmi2_after_adjust sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: MOVQ R13, R12 @@ -2473,8 +2534,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, R12 CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(R12*8), R14 + ADDQ 144(CX)(R12*8), R14 JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid MOVQ $0x00000001, R14 @@ -2490,7 +2550,7 @@ sequenceDecs_decodeSync_bmi2_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_bmi2_adjust_end: +sequenceDecs_decodeSync_bmi2_after_adjust: MOVQ R13, 8(SP) // Check values @@ -2787,6 +2847,10 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 72(AX), DI MOVQ 80(AX), R8 MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) MOVQ 112(AX), R10 MOVQ 128(AX), CX MOVQ CX, 32(SP) @@ -2836,34 +2900,46 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 8(SP) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_of_update_zero: + MOVQ AX, 8(SP) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 16(SP) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ml_update_zero: + MOVQ AX, 16(SP) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -2891,19 +2967,25 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 24(SP) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ll_update_zero: + MOVQ AX, 24(SP) // Fill bitreader for state updates MOVQ R13, (SP) @@ -2983,7 +3065,7 @@ sequenceDecs_decodeSync_safe_amd64_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -2995,7 +3077,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: MOVQ R13, AX @@ -3004,8 +3086,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, AX CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(AX*8), R14 + ADDQ 144(CX)(AX*8), R14 JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid MOVQ $0x00000001, R14 @@ -3021,7 +3102,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_safe_amd64_adjust_end: +sequenceDecs_decodeSync_safe_amd64_after_adjust: MOVQ R13, 8(SP) // Check values @@ -3420,6 +3501,10 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 72(CX), SI MOVQ 80(CX), DI MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) MOVQ 112(CX), R9 MOVQ 128(CX), R10 MOVQ R10, 32(SP) @@ -3592,7 +3677,7 @@ sequenceDecs_decodeSync_safe_bmi2_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -3604,7 +3689,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: MOVQ R13, R12 @@ -3613,8 +3698,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, R12 CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(R12*8), R14 + ADDQ 144(CX)(R12*8), R14 JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid MOVQ $0x00000001, R14 @@ -3630,7 +3714,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_safe_bmi2_adjust_end: +sequenceDecs_decodeSync_safe_bmi2_after_adjust: MOVQ R13, 8(SP) // Check values diff --git a/zstd/testdata/fuzz/decode-corpus-encoded.zip b/zstd/testdata/fuzz/decode-corpus-encoded.zip new file mode 100644 index 0000000000..ca3f5bc5a3 Binary files /dev/null and b/zstd/testdata/fuzz/decode-corpus-encoded.zip differ diff --git a/zstd/testdata/fuzz/decode-corpus-raw.zip b/zstd/testdata/fuzz/decode-corpus-raw.zip new file mode 100644 index 0000000000..8314d3f63b Binary files /dev/null and b/zstd/testdata/fuzz/decode-corpus-raw.zip differ diff --git a/zstd/testdata/fuzz/encode-corpus-encoded.zip b/zstd/testdata/fuzz/encode-corpus-encoded.zip new file mode 100644 index 0000000000..5f2ae9aab6 Binary files /dev/null and b/zstd/testdata/fuzz/encode-corpus-encoded.zip differ diff --git a/zstd/testdata/fuzz/encode-corpus-raw.zip b/zstd/testdata/fuzz/encode-corpus-raw.zip new file mode 100644 index 0000000000..4b34bcfe8c Binary files /dev/null and b/zstd/testdata/fuzz/encode-corpus-raw.zip differ diff --git a/zstd/zstd_test.go b/zstd/zstd_test.go index cd1f3b1a73..0278d49c45 100644 --- a/zstd/zstd_test.go +++ b/zstd/zstd_test.go @@ -16,12 +16,12 @@ var isRaceTest bool func TestMain(m *testing.M) { ec := m.Run() - if ec == 0 && runtime.NumGoroutine() > 1 { + if ec == 0 && runtime.NumGoroutine() > 2 { n := 0 - for n < 60 { + for n < 15 { n++ time.Sleep(time.Second) - if runtime.NumGoroutine() == 1 { + if runtime.NumGoroutine() == 2 { os.Exit(0) } }