Skip to content

Commit

Permalink
zstd: extend executeSimple with history support (#542)
Browse files Browse the repository at this point in the history
* zstd: extend executeSimple with history support
  • Loading branch information
WojciechMula committed Apr 4, 2022
1 parent 0309165 commit c4e0096
Show file tree
Hide file tree
Showing 5 changed files with 329 additions and 80 deletions.
162 changes: 145 additions & 17 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ const errorMatchLenTooBig = 2

const maxMatchLen = 131074

// size of struct seqVals
const seqValsSize = 24

func main() {
flag.Parse()
out := flag.Lookup("out")
Expand Down Expand Up @@ -129,6 +132,8 @@ func (o options) genDecodeSeqAsm(name string) {
ADDQ(brOffset, brPointer) // Add current offset to read pointer.
MOVQ(brPointer, brPointerStash)
}

// 2. load states (done once)
{
ctx := Dereference(Param("ctx"))
Load(ctx.Field("llState"), llState)
Expand Down Expand Up @@ -250,7 +255,7 @@ func (o options) genDecodeSeqAsm(name string) {
}

Label(name + "_match_len_ofs_ok")
ADDQ(U8(24), seqBase) // sizof(seqVals) == 3*8
ADDQ(U8(seqValsSize), seqBase)
ctx = Dereference(Param("ctx"))
iterationP, err := ctx.Field("iteration").Resolve()
if err != nil {
Expand Down Expand Up @@ -584,6 +589,8 @@ func (e executeSimple) generateProcedure(name string) {
literals := GP64()
outPosition := GP64()
windowSize := GP64()
histBase := GP64()
histLen := GP64()

{
ctx := Dereference(Param("ctx"))
Expand All @@ -597,6 +604,10 @@ func (e executeSimple) generateProcedure(name string) {
Load(ctx.Field("literals").Base(), literals)
Load(ctx.Field("outPosition"), outPosition)
Load(ctx.Field("windowSize"), windowSize)
Load(ctx.Field("history").Base(), histBase)
Load(ctx.Field("history").Len(), histLen)

ADDQ(histLen, histBase) // Note: we always copy from &hist[len(hist) - v]

tmp := GP64()
Comment("seqsBase += 24 * seqIndex")
Expand All @@ -618,35 +629,95 @@ func (e executeSimple) generateProcedure(name string) {
mlPtr := Mem{Base: seqsBase, Disp: 1 * 8}
llPtr := Mem{Base: seqsBase, Disp: 0 * 8}

MOVQ(mlPtr, ml)
MOVQ(llPtr, ll)
MOVQ(mlPtr, ml)
MOVQ(moPtr, mo)

// generates the loop tail
handleLoop := func() {
ADDQ(U8(seqValsSize), seqsBase) // seqs += sizeof(seqVals)
INCQ(seqIndex)
CMPQ(seqIndex, seqsLen)
JB(LabelRef("main_loop"))
}

Comment("Copy literals")
Label("copy_literals")
{
TESTQ(ll, ll)
JZ(LabelRef("copy_match"))
JZ(LabelRef("copy_history"))
e.copyMemory("1", literals, outBase, ll)

ADDQ(ll, literals)
ADDQ(ll, outBase)
ADDQ(ll, outPosition)
}

Comment("Copy match")
Comment("Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)")
{
tmp := GP64()
LEAQ(Mem{Base: outPosition, Index: histLen, Scale: 1}, tmp)
CMPQ(mo, tmp)
JG(LabelRef("error_match_off_too_big"))
CMPQ(mo, windowSize)
JG(LabelRef("error_match_off_too_big"))
}

Comment("Copy match from history")
{
Label("copy_history")
v := GP64()
MOVQ(mo, v)
SUBQ(outPosition, v) // v := seq.mo - outPosition
CMPQ(v, U8(0)) // do nothing if v <= 0
JLE(LabelRef("copy_match"))

// v := seq.mo - t; v > 0 {
// start := len(hist) - v
// ...
// }
ptr := GP64()
MOVQ(histBase, ptr)
SUBQ(v, ptr) // ptr := &hist[len(hist) - v]
CMPQ(ml, v)
JGE(LabelRef("copy_all_from_history"))
/* if ml <= v {
copy(out[outPosition:], hist[start:start+seq.ml])
t += seq.ml
continue
}
*/
e.copyMemoryPrecise("4", ptr, outBase, ml)

ADDQ(ml, outPosition)
ADDQ(ml, outBase)
// Note: for the current go tests this branch is taken in 99.53% cases,
// this is why we repeat a little code here.
handleLoop()
JMP(LabelRef("loop_finished"))

Label("copy_all_from_history")
/* if seq.ml > v {
// Some goes into current block.
// Copy remainder of history
copy(out[outPosition:], hist[start:])
outPosition += v
seq.ml -= v
}
*/
e.copyMemoryPrecise("5", ptr, outBase, v)
ADDQ(v, outBase)
ADDQ(v, outPosition)
SUBQ(v, ml)
// fallback to the next block
}

Comment("Copy match from the current buffer")
Label("copy_match")
{
TESTQ(ml, ml)
JZ(LabelRef("handle_loop"))

MOVQ(moPtr, mo)

Comment("Malformed input if seq.mo > t || seq.mo > s.windowSize)")
CMPQ(mo, outPosition)
JG(LabelRef("error_match_off_to_big"))
CMPQ(mo, windowSize)
JG(LabelRef("error_match_off_to_big"))

src := GP64()
MOVQ(outBase, src)
SUBQ(mo, src) // src = &s.out[t - mo]
Expand Down Expand Up @@ -683,10 +754,7 @@ func (e executeSimple) generateProcedure(name string) {
}

Label("handle_loop")
ADDQ(U8(24), seqsBase) // seqs += sizeof(seqVals)
INCQ(seqIndex)
CMPQ(seqIndex, seqsLen)
JB(LabelRef("main_loop"))
handleLoop()

ret, err := ReturnIndex(0).Resolve()
if err != nil {
Expand All @@ -709,10 +777,11 @@ func (e executeSimple) generateProcedure(name string) {
SUBQ(tmp, literals) // litPosition := current - initial literals pointer
Store(literals, ctx.Field("litPosition"))
}
Label("loop_finished")
returnValue(1)
RET()

Label("error_match_off_to_big")
Label("error_match_off_too_big")
returnValue(0)
RET()

Expand Down Expand Up @@ -740,6 +809,65 @@ func (e executeSimple) copyMemory(suffix string, src, dst, length reg.GPVirtual)
JB(LabelRef(label))
}

// copyMemoryPrecise will copy memory in blocks of 16 bytes,
// without overwriting nor overreading.
func (e executeSimple) copyMemoryPrecise(suffix string, src, dst, length reg.GPVirtual) {
label := "copy_" + suffix
ofs := GP64()
s := Mem{Base: src, Index: ofs, Scale: 1}
d := Mem{Base: dst, Index: ofs, Scale: 1}

tmp := GP64()
XORQ(ofs, ofs)

Label("copy_" + suffix + "_byte")
TESTQ(U32(0x1), length)
JZ(LabelRef("copy_" + suffix + "_word"))

// copy one byte if length & 0x01 != 0
MOVB(s, tmp.As8())
MOVB(tmp.As8(), d)
ADDQ(U8(1), ofs)

Label("copy_" + suffix + "_word")
TESTQ(U32(0x2), length)
JZ(LabelRef("copy_" + suffix + "_dword"))

// copy two bytes if length & 0x02 != 0
MOVW(s, tmp.As16())
MOVW(tmp.As16(), d)
ADDQ(U8(2), ofs)

Label("copy_" + suffix + "_dword")
TESTQ(U32(0x4), length)
JZ(LabelRef("copy_" + suffix + "_qword"))

// copy four bytes if length & 0x04 != 0
MOVL(s, tmp.As32())
MOVL(tmp.As32(), d)
ADDQ(U8(4), ofs)

Label("copy_" + suffix + "_qword")
TESTQ(U32(0x8), length)
JZ(LabelRef("copy_" + suffix + "_test"))

// copy eight bytes if length & 0x08 != 0
MOVQ(s, tmp)
MOVQ(tmp, d)
ADDQ(U8(8), ofs)
JMP(LabelRef("copy_" + suffix + "_test"))

// copy in 16-byte chunks
Label(label)
t := XMM()
MOVUPS(s, t)
MOVUPS(t, d)
ADDQ(U8(e.copySize()), ofs)
Label("copy_" + suffix + "_test")
CMPQ(ofs, length)
JB(LabelRef(label))
}

// copyOverlappedMemory will copy one byte at the time from src to dst.
func (e executeSimple) copyOverlappedMemory(suffix string, src, dst, length reg.GPVirtual) {
label := "copy_slow_" + suffix
Expand Down
4 changes: 2 additions & 2 deletions zstd/seqdec.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
if len(hist) == 0 && len(s.dict) == 0 {
return s.executeSimple(seqs)
if len(s.dict) == 0 {
return s.executeSimple(seqs, hist)
}

// Ensure we have enough output size...
Expand Down
8 changes: 5 additions & 3 deletions zstd/seqdec_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ type executeAsmContext struct {
seqs []seqVals
seqIndex int
out []byte
history []byte
literals []byte
outPosition int
litPosition int
Expand All @@ -117,8 +118,8 @@ func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool

const overwriteSize = 16

// executeSimple handles cases when no history nor dictionary are used.
func (s *sequenceDecs) executeSimple(seqs []seqVals) error {
// executeSimple handles cases when dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
if len(s.out)+s.seqSize+overwriteSize > cap(s.out) {
addBytes := s.seqSize + len(s.out) + overwriteSize
Expand All @@ -137,6 +138,7 @@ func (s *sequenceDecs) executeSimple(seqs []seqVals) error {
seqs: seqs,
seqIndex: 0,
out: out,
history: hist,
outPosition: t,
litPosition: 0,
literals: s.literals,
Expand All @@ -146,7 +148,7 @@ func (s *sequenceDecs) executeSimple(seqs []seqVals) error {
ok := sequenceDecs_executeSimple_amd64(&ctx)
if !ok {
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
seqs[ctx.seqIndex].mo, ctx.outPosition)
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
}
s.literals = s.literals[ctx.litPosition:]
t = ctx.outPosition
Expand Down

0 comments on commit c4e0096

Please sign in to comment.