Skip to content

Commit

Permalink
[skip ci] Review fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
WojciechMula committed May 12, 2022
1 parent a690f36 commit af78b59
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 102 deletions.
17 changes: 7 additions & 10 deletions zstd/_generate/gen.go
Expand Up @@ -302,28 +302,25 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
// Update states
Comment("Update Offset State")
{
nBits := GP64()
nBits := ofState // Note: SHRXQ uses lower 6 bits of shift amount and BZHIQ lower 8 bits of count
lowBits := GP64()
MOVBQZX(ofState.As8(), nBits)
BZHIQ(nBits, bits, lowBits) // lowBits = bits & ((1 << nBits) - 1))
SHRXQ(nBits, bits, bits) // bits >= nBits
SHRXQ(nBits, bits, bits) // bits >>= nBits
o.nextState(name+"_ofState", ofState, lowBits, "ofTable")
}
Comment("Update Match Length State")
{
nBits := GP64()
nBits := mlState
lowBits := GP64()
MOVBQZX(mlState.As8(), nBits)
BZHIQ(nBits, bits, lowBits) // lowBits = lowBits & ((1 << nBits) - 1))
SHRXQ(nBits, bits, bits) // lowBits >= nBits
BZHIQ(nBits, bits, lowBits) // lowBits = bits & ((1 << nBits) - 1))
SHRXQ(nBits, bits, bits) // lowBits >>= nBits
o.nextState(name+"_mlState", mlState, lowBits, "mlTable")
}
Comment("Update Literal Length State")
{
nBits := GP64()
nBits := llState
lowBits := GP64()
MOVBQZX(llState.As8(), nBits)
BZHIQ(nBits, bits, lowBits) // lowBits = lowBits & ((1 << nBits) - 1))
BZHIQ(nBits, bits, lowBits) // lowBits = bits & ((1 << nBits) - 1))
o.nextState(name+"_llState", llState, lowBits, "llTable")
}
} else {
Expand Down
172 changes: 80 additions & 92 deletions zstd/seqdec_amd64.s
Expand Up @@ -721,37 +721,34 @@ sequenceDecs_decode_bmi2_fill_2_end:
BZHIQ R14, R15, R15

// Update Offset State
MOVBQZX R8, CX
BZHIQ CX, R15, R14
SHRXQ CX, R15, R15
MOVQ $0x00001010, CX
BEXTRQ CX, R8, CX
ADDQ R14, CX
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8

// Load ctx.ofTable
MOVQ ctx+16(FP), R8
MOVQ 48(R8), R8
MOVQ (R8)(CX*8), R8
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8

// Update Match Length State
MOVBQZX DI, CX
BZHIQ CX, R15, R14
SHRXQ CX, R15, R15
MOVQ $0x00001010, CX
BEXTRQ CX, DI, CX
ADDQ R14, CX
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI

// Load ctx.mlTable
MOVQ ctx+16(FP), DI
MOVQ 24(DI), DI
MOVQ (DI)(CX*8), DI
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI

// Update Literal Length State
MOVBQZX SI, CX
BZHIQ CX, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI

// Load ctx.llTable
MOVQ ctx+16(FP), CX
Expand Down Expand Up @@ -979,37 +976,34 @@ sequenceDecs_decode_56_bmi2_fill_end:
BZHIQ R14, R15, R15

// Update Offset State
MOVBQZX R8, CX
BZHIQ CX, R15, R14
SHRXQ CX, R15, R15
MOVQ $0x00001010, CX
BEXTRQ CX, R8, CX
ADDQ R14, CX
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8

// Load ctx.ofTable
MOVQ ctx+16(FP), R8
MOVQ 48(R8), R8
MOVQ (R8)(CX*8), R8
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8

// Update Match Length State
MOVBQZX DI, CX
BZHIQ CX, R15, R14
SHRXQ CX, R15, R15
MOVQ $0x00001010, CX
BEXTRQ CX, DI, CX
ADDQ R14, CX
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI

// Load ctx.mlTable
MOVQ ctx+16(FP), DI
MOVQ 24(DI), DI
MOVQ (DI)(CX*8), DI
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI

// Update Literal Length State
MOVBQZX SI, CX
BZHIQ CX, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI

// Load ctx.llTable
MOVQ ctx+16(FP), CX
Expand Down Expand Up @@ -2277,37 +2271,34 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
BZHIQ R13, R14, R14

// Update Offset State
MOVBQZX R8, CX
BZHIQ CX, R14, R13
SHRXQ CX, R14, R14
MOVQ $0x00001010, CX
BEXTRQ CX, R8, CX
ADDQ R13, CX
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8

// Load ctx.ofTable
MOVQ ctx+16(FP), R8
MOVQ 48(R8), R8
MOVQ (R8)(CX*8), R8
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8

// Update Match Length State
MOVBQZX DI, CX
BZHIQ CX, R14, R13
SHRXQ CX, R14, R14
MOVQ $0x00001010, CX
BEXTRQ CX, DI, CX
ADDQ R13, CX
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI

// Load ctx.mlTable
MOVQ ctx+16(FP), DI
MOVQ 24(DI), DI
MOVQ (DI)(CX*8), DI
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI

// Update Literal Length State
MOVBQZX SI, CX
BZHIQ CX, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI

// Load ctx.llTable
MOVQ ctx+16(FP), CX
Expand Down Expand Up @@ -3310,37 +3301,34 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
BZHIQ R13, R14, R14

// Update Offset State
MOVBQZX R8, CX
BZHIQ CX, R14, R13
SHRXQ CX, R14, R14
MOVQ $0x00001010, CX
BEXTRQ CX, R8, CX
ADDQ R13, CX
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8

// Load ctx.ofTable
MOVQ ctx+16(FP), R8
MOVQ 48(R8), R8
MOVQ (R8)(CX*8), R8
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8

// Update Match Length State
MOVBQZX DI, CX
BZHIQ CX, R14, R13
SHRXQ CX, R14, R14
MOVQ $0x00001010, CX
BEXTRQ CX, DI, CX
ADDQ R13, CX
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI

// Load ctx.mlTable
MOVQ ctx+16(FP), DI
MOVQ 24(DI), DI
MOVQ (DI)(CX*8), DI
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI

// Update Literal Length State
MOVBQZX SI, CX
BZHIQ CX, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI

// Load ctx.llTable
MOVQ ctx+16(FP), CX
Expand Down

0 comments on commit af78b59

Please sign in to comment.