Skip to content

Commit

Permalink
[skip ci] Resize out if needed
Browse files Browse the repository at this point in the history
  • Loading branch information
WojciechMula committed Apr 1, 2022
1 parent ab85976 commit b1f1d93
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 60 deletions.
101 changes: 79 additions & 22 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ const errorMatchLenTooBig = 2
// error reported when mo > t or mo > s.windowSize
const errorMatchOffTooBig = 3

// error reported by decodeSync when out buffer is too small
const errorOutOfCapacity = 4

const maxMatchLen = 131074

func main() {
Expand All @@ -55,7 +58,7 @@ func main() {
o.bmi2 = true
o.genDecodeSeqAsm("sequenceDecs_decode_bmi2")

exec := executeSimple{}
exec := executeSimple{useSeqs: true}
exec.generateProcedure("sequenceDecs_executeSimple_amd64")

decodeSync := decodeSync{}
Expand Down Expand Up @@ -120,12 +123,12 @@ func (o options) genDecodeSeqAsm(name string) {
Doc(name+" decodes a sequence", "")
Pragma("noescape")

nop := func(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr Mem) {}
nop := func(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr, outCapPtr Mem) {}

o.generateBody(name, nop)
}

func (o options) generateBody(name string, executeSingleTriple func(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr Mem)) {
func (o options) generateBody(name string, executeSingleTriple func(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr, outCapPtr Mem)) {
// for decode
brValue := GP64()
brBitsRead := GP64()
Expand Down Expand Up @@ -153,36 +156,58 @@ func (o options) generateBody(name string, executeSingleTriple func(literals, ou
ADDQ(brOffset, brPointer) // Add current offset to read pointer.
MOVQ(brPointer, brPointerStash)
}
var moP Mem
var mlP Mem
var llP Mem
var outCapPtr Mem

{
ctx := Dereference(Param("ctx"))
Load(ctx.Field("llState"), llState)
Load(ctx.Field("mlState"), mlState)
Load(ctx.Field("ofState"), ofState)
if o.useSeqs {
Load(ctx.Field("seqs").Base(), seqBase)

moP = Mem{Base: seqBase, Disp: 2 * 8} // Pointer to current mo
mlP = Mem{Base: seqBase, Disp: 1 * 8} // Pointer to current ml
llP = Mem{Base: seqBase, Disp: 0 * 8} // Pointer to current ll
} else {
moP = AllocLocal(8)
mlP = AllocLocal(8)
llP = AllocLocal(8)
outCapPtr = AllocLocal(8)

Load(ctx.Field("out").Base(), outBase)
Load(ctx.Field("literals").Base(), literals)
Load(ctx.Field("outPosition"), outPosition)
Load(ctx.Field("windowSize"), windowSize)

tmp := GP64()
Load(ctx.Field("out").Cap(), tmp)
MOVQ(tmp, outCapPtr)

Comment("outBase += outPosition")
ADDQ(outPosition, outBase)
}
}

var moP Mem
var mlP Mem
var llP Mem

if o.useSeqs {
moP = Mem{Base: seqBase, Disp: 2 * 8} // Pointer to current mo
mlP = Mem{Base: seqBase, Disp: 1 * 8} // Pointer to current ml
llP = Mem{Base: seqBase, Disp: 0 * 8} // Pointer to current ll
} else {
moP = AllocLocal(8)
mlP = AllocLocal(8)
llP = AllocLocal(8)
Comment("Check if we're retrying after `out` resize")
retry, err := ctx.Field("retry").Resolve()
if err != nil {
panic(err)
}
CMPQ(retry.Addr, U8(1))
JNE(LabelRef(name + "_main_loop"))

tmp = GP64()
Load(ctx.Field("ll"), tmp)
MOVQ(tmp, llP)
Load(ctx.Field("mo"), tmp)
MOVQ(tmp, moP)
Load(ctx.Field("ml"), tmp)
MOVQ(tmp, mlP)

JMP(LabelRef("execute_single_triple"))
}
}

// MAIN LOOP:
Expand Down Expand Up @@ -294,8 +319,9 @@ func (o options) generateBody(name string, executeSingleTriple func(literals, ou
}

Label(name + "_match_len_ofs_ok")
Label("execute_single_triple")

executeSingleTriple(literals, outBase, outPosition, windowSize, llP, moP, mlP)
executeSingleTriple(literals, outBase, outPosition, windowSize, llP, moP, mlP, outCapPtr)

Label("handle_loop")
ADDQ(U8(24), seqBase) // sizof(seqVals) == 3*8
Expand Down Expand Up @@ -357,14 +383,31 @@ func (o options) generateBody(name string, executeSingleTriple func(literals, ou
{
Label("error_match_off_too_big")
if !o.useSeqs {
ctx := Dereference(Param("ctx"))
tmp := GP64()
MOVQ(moP, tmp)
ctx := Dereference(Param("ctx"))
Store(tmp, ctx.Field("mo"))
Store(outPosition, ctx.Field("outPosition"))
}
o.returnWithCode(errorMatchOffTooBig)
}

if !o.useSeqs {
Comment("Return request to resize `out` by at least ll + ml bytes")
Label("error_out_of_capacity")
ctx := Dereference(Param("ctx"))
tmp := GP64()
MOVQ(llP, tmp)
Store(tmp, ctx.Field("ll"))
MOVQ(mlP, tmp)
Store(tmp, ctx.Field("ml"))
Store(outPosition, ctx.Field("outPosition"))

br := Dereference(Param("br"))
Store(brValue, br.Field("value"))
Store(brBitsRead.As8(), br.Field("bitsRead"))
Store(brOffset, br.Field("off"))
}
}

func (o options) returnWithCode(returnCode uint32) {
Expand Down Expand Up @@ -650,7 +693,9 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual)
return offset
}

type executeSimple struct{}
type executeSimple struct {
useSeqs bool
}

// copySize returns register size used to fast copy.
//
Expand Down Expand Up @@ -706,8 +751,9 @@ func (e executeSimple) generateBody(name string) {
moPtr := Mem{Base: seqsBase, Disp: 2 * 8}
mlPtr := Mem{Base: seqsBase, Disp: 1 * 8}
llPtr := Mem{Base: seqsBase, Disp: 0 * 8}
var outCapPtr Mem // Note: unused in this case

e.executeSingleTriple(literals, outBase, outPosition, windowSize, llPtr, moPtr, mlPtr)
e.executeSingleTriple(literals, outBase, outPosition, windowSize, llPtr, moPtr, mlPtr, outCapPtr)

Label("handle_loop")
ADDQ(U8(24), seqsBase) // seqs += sizeof(seqVals)
Expand Down Expand Up @@ -749,7 +795,18 @@ func (e executeSimple) generateBody(name string) {
RET()
}

func (e executeSimple) executeSingleTriple(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr Mem) {
func (e executeSimple) executeSingleTriple(literals, outBase, outPosition, windowSize reg.GPVirtual, llPtr, moPtr, mlPtr, outCapPtr Mem) {
if !e.useSeqs {
Comment("Check if ll + ml < cap(out)")
capacity := GP64()
MOVQ(outCapPtr, capacity)
sum := GP64()
MOVQ(llPtr, sum)
ADDQ(mlPtr, sum)
CMPQ(sum, capacity)
JA(LabelRef("error_out_of_capacity"))
}

Comment("Copy literals")
Label("copy_literals")
{
Expand Down
47 changes: 37 additions & 10 deletions zstd/seqdec_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ type decodeSyncAsmContext struct {
literals []byte
litPosition int
windowSize int
ml int // set on error
mo int // set on error
retry bool // set be the caller when `out` got resized after reporting errorOutOfCapacity
ll int // set on error
ml int // set on error
mo int // set on error
}

// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
Expand Down Expand Up @@ -68,14 +70,35 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {

s.seqSize = 0

var errCode int
if cpuinfo.HasBMI2() {
errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
}
if errCode != 0 {
for true {
var errCode int
if cpuinfo.HasBMI2() {
errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
}
if errCode == 0 {
break
}

switch errCode {
case errorOutOfCapacity:
// Not enough size, which can happen under high volume block streaming conditions
// but could be if destination slice is too small for sync operations.
// over-allocating here can create a large amount of GC pressure so we try to keep
// it as contained as possible
used := ctx.outPosition
addBytes := 256 + ctx.ll + ctx.ml + used>>2
// Clamp to max block size.
if used+addBytes > maxBlockSize {
addBytes = maxBlockSize - used
}
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]

ctx.out = s.out
ctx.retry = true

case errorMatchLenOfsMismatch:
return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)

Expand All @@ -84,9 +107,10 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {

case errorMatchOffTooBig:
return true, fmt.Errorf("XXX: match offset (%d) bigger than max allowed length (%d)", ctx.mo, ctx.outPosition)
default:
return true, fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
}

return true, fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
}

if ctx.litRemain < 0 {
Expand Down Expand Up @@ -146,6 +170,9 @@ const errorMatchLenTooBig = 2
// error reported when mo > t or mo > s.windowSize
const errorMatchOffTooBig = 3

// error reported by decodeSync when out buffer is too small
const errorOutOfCapacity = 4

// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
Expand Down

0 comments on commit b1f1d93

Please sign in to comment.