Skip to content

Commit

Permalink
zstd: Store previous offsets in registers (#548)
Browse files Browse the repository at this point in the history
Not super visible in microbenches (presumably because of cache), but very visible in streaming decodes.

Before/after:

```
BenchmarkDecoderSilesia-32    	       4	 281168525 ns/op	 753.81 MB/s	   50218 B/op	      44 allocs/op
BenchmarkDecoderSilesia-32    	       6	 190584583 ns/op	1112.09 MB/s	   49446 B/op	      45 allocs/op

BenchmarkDecoderEnwik9-32    	       1	1439964200 ns/op	 694.46 MB/s	   71952 B/op	      51 allocs/op
BenchmarkDecoderEnwik9-32    	       1	1184307200 ns/op	 844.38 MB/s	   72144 B/op	      53 allocs/op
```
  • Loading branch information
klauspost committed Apr 4, 2022
1 parent d1c1898 commit 20effe5
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 232 deletions.
103 changes: 58 additions & 45 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,16 @@ func (o options) genDecodeSeqAsm(name string) {
mlP := Mem{Base: seqBase, Disp: 1 * 8} // Pointer to current ml
llP := Mem{Base: seqBase, Disp: 0 * 8} // Pointer to current ll

// Store previous offsets in registers.
var offsets [3]reg.GPVirtual
s := Dereference(Param("s"))
for i := range offsets {
offsets[i] = GP64()
po, _ := s.Field("prevOffset").Index(i).Resolve()

MOVQ(po.Addr, offsets[i])
}

// MAIN LOOP:
Label(name + "_main_loop")

Expand Down Expand Up @@ -209,7 +219,7 @@ func (o options) genDecodeSeqAsm(name string) {

Comment("Adjust offset")

offset := o.adjustOffset(name+"_adjust", moP, llP, R14)
offset := o.adjustOffset(name+"_adjust", moP, llP, R14, &offsets)
MOVQ(offset, moP) // Store offset

Comment("Check values")
Expand Down Expand Up @@ -265,6 +275,13 @@ func (o options) genDecodeSeqAsm(name string) {
DECQ(iterationP.Addr)
JNS(LabelRef(name + "_main_loop"))

// Store offsets
s = Dereference(Param("s"))
for i := range offsets {
po, _ := s.Field("prevOffset").Index(i).Resolve()
MOVQ(offsets[i], po.Addr)
}

// update bitreader state before returning
br := Dereference(Param("br"))
Store(brValue, br.Field("value"))
Expand Down Expand Up @@ -454,12 +471,7 @@ func (o options) getBits(name string, nBits, brValue, brBitsRead reg.GPVirtual,
return BX
}

func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual) (offset reg.GPVirtual) {
s := Dereference(Param("s"))

po0, _ := s.Field("prevOffset").Index(0).Resolve()
po1, _ := s.Field("prevOffset").Index(1).Resolve()
po2, _ := s.Field("prevOffset").Index(2).Resolve()
func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual, offsets *[3]reg.GPVirtual) (offset reg.GPVirtual) {
offset = GP64()
MOVQ(moP, offset)
{
Expand All @@ -472,10 +484,9 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual)
CMPQ(offsetB, U8(1))
JBE(LabelRef(name + "_offsetB_1_or_0"))

tmp := XMM()
MOVUPS(po0.Addr, tmp) // tmp = (s.prevOffset[0], s.prevOffset[1])
MOVQ(offset, po0.Addr) // s.prevOffset[0] = offset
MOVUPS(tmp, po1.Addr) // s.prevOffset[1], s.prevOffset[2] = s.prevOffset[0], s.prevOffset[1]
MOVQ(offsets[1], offsets[2]) // s.prevOffset[2] = s.prevOffset[1]
MOVQ(offsets[0], offsets[1]) // s.prevOffset[1] = s.prevOffset[0]
MOVQ(offset, offsets[0]) // s.prevOffset[0] = offset
JMP(LabelRef(name + "_end"))
}

Expand Down Expand Up @@ -504,7 +515,7 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual)
Label(name + "_offset_maybezero")
TESTQ(offset, offset)
JNZ(LabelRef(name + "_offset_nonzero"))
MOVQ(po0.Addr, offset)
MOVQ(offsets[0], offset)
JMP(LabelRef(name + "_end"))
}
}
Expand All @@ -515,31 +526,34 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual)
// } else {
// temp = s.prevOffset[offset]
// }
//
// this if got transformed into:
//
// ofs := offset
// shift := 0
// if offset == 3 {
// ofs = 0
// shift = -1
// }
// temp := s.prevOffset[ofs] + shift
// TODO: This should be easier...
CX, DX, R15 := GP64(), GP64(), GP64()
MOVQ(offset, CX)
XORQ(DX, DX)
MOVQ(I32(-1), R15)
CMPQ(offset, U8(3))
CMOVQEQ(DX, CX)
CMOVQEQ(R15, DX)
prevOffset := GP64()
LEAQ(po0.Addr, prevOffset) // &prevOffset[0]
ADDQ(Mem{Base: prevOffset, Index: CX, Scale: 8}, DX)
temp := DX
temp := GP64()
CMPQ(offset, U8(1))
JB(LabelRef(name + "_zero"))
JEQ(LabelRef(name + "_one"))
CMPQ(offset, U8(2))
JA(LabelRef(name + "_three"))
JMP(LabelRef(name + "_two"))

Label(name + "_zero")
MOVQ(offsets[0], temp)
JMP(LabelRef(name + "_test_temp_valid"))

Label(name + "_one")
MOVQ(offsets[1], temp)
JMP(LabelRef(name + "_test_temp_valid"))

Label(name + "_two")
MOVQ(offsets[2], temp)
JMP(LabelRef(name + "_test_temp_valid"))

Label(name + "_three")
LEAQ(Mem{Base: offsets[0], Disp: -1}, temp)

Label(name + "_test_temp_valid")
// if temp == 0 {
// temp = 1
// }
TESTQ(temp, temp)
JNZ(LabelRef(name + "_temp_valid"))
MOVQ(U32(1), temp)

Expand All @@ -548,19 +562,18 @@ func (o options) adjustOffset(name string, moP, llP Mem, offsetB reg.GPVirtual)
// s.prevOffset[2] = s.prevOffset[1]
// }
CMPQ(offset, U8(1))
JZ(LabelRef(name + "_skip"))
tmp := GP64()
MOVQ(po1.Addr, tmp)
MOVQ(tmp, po2.Addr) // s.prevOffset[2] = s.prevOffset[1]

Label(name + "_skip")
if false {
JZ(LabelRef(name + "_skip"))
MOVQ(offsets[1], offsets[2]) // s.prevOffset[2] = s.prevOffset[1]
Label(name + "_skip")
} else {
CMOVQNE(offsets[1], offsets[2])
}
// s.prevOffset[1] = s.prevOffset[0]
// s.prevOffset[0] = temp
tmp = GP64()
MOVQ(po0.Addr, tmp)
MOVQ(tmp, po1.Addr) // s.prevOffset[1] = s.prevOffset[0]
MOVQ(temp, po0.Addr) // s.prevOffset[0] = temp
MOVQ(temp, offset) // return temp
MOVQ(offsets[0], offsets[1])
MOVQ(temp, offsets[0])
MOVQ(temp, offset) // return temp
}
Label(name + "_end")
return offset
Expand Down

0 comments on commit 20effe5

Please sign in to comment.