Skip to content

Commit

Permalink
Update comments
Browse files Browse the repository at this point in the history
  • Loading branch information
WojciechMula committed Mar 10, 2022
1 parent dd617a4 commit 0dcdbd0
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 53 deletions.
73 changes: 44 additions & 29 deletions zstd/seqdec_amd64.s
Expand Up @@ -25,34 +25,48 @@ TEXT ·sequenceDecs_decode_amd64(SB), NOSPLIT, $8
/*
This procedure implements the following sequence:
// s.next()
br.fill()
mo, moB := ofState.final()
mo += br.getBits(moB)
br.fill()
ml, mlB := mlState.final()
ml += br.getBits(mlB)
ll, llB := llState.final()
ll += br.getBits(llB)
br.fill()
if i != 0 {
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
for ctx.iteration >= 0 {
// s.next()
br.fill()
mo, moB := ofState.final()
mo += br.getBits(moB)
br.fill()
ml, mlB := mlState.final()
ml += br.getBits(mlB)
ll, llB := llState.final()
ll += br.getBits(llB)
br.fill()
if ctx.iteration != 0 {
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}
mo = s.adjustOffset(mo, ll, moB)
if ml > maxMatchLen {
return errorMatchLenTooBig
}
if mo == 0 && ml > 0 {
return errorMatchLenOfsMismatch
}
ctx.iteration -= 1
}
mo = s.adjustOffset(mo, ll, moB)
return 0
*/
#define br_value R8 // br.value
#define br_bits_read R9 // br.bitsRead
Expand Down Expand Up @@ -288,7 +302,7 @@ br_fill_byte_by_byte_3:
br_fill_end_3:
// bitreader_fill end

// if i != 0 {
// if ctx.iteration != 0 {
// nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
// bits := br.get32BitsFast(nBits)
// lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
Expand Down Expand Up @@ -545,15 +559,16 @@ check_triple:
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
*/
XORQ AX, AX
TESTQ BX, BX
SETEQ DL
CMPQ CX, $0
SETHI AL
ANDQ DX, AX
TESTB AL, AL
TESTQ AX, AX
JNZ error_match_len_ofs_mismatch

ADDQ $24, seqs
ADDQ $24, seqs // sizof(seqVals) == 3*8

DECQ decodeAsmContext_iteration(DI)
JNS main_loop
Expand Down
74 changes: 50 additions & 24 deletions zstd/seqdec_amd64.s.in
Expand Up @@ -185,6 +185,16 @@ br_fill_end{{.}}:
#endif
{{end}}

{{/*
Input:
AX - number of bits

Output:
BX - value

Clobbers:
AX, BX, CX
*/}}
{{define "get_bits"}}
#ifdef GOAMD64_v3
LEAQ (br_bits_read)(AX*1), CX
Expand Down Expand Up @@ -218,34 +228,48 @@ TEXT ·sequenceDecs_decode_amd64(SB), NOSPLIT, $8
/*
This procedure implements the following sequence:

// s.next()
br.fill()
mo, moB := ofState.final()
mo += br.getBits(moB)
for ctx.iteration >= 0 {
// s.next()
br.fill()
mo, moB := ofState.final()
mo += br.getBits(moB)

br.fill()
ml, mlB := mlState.final()
ml += br.getBits(mlB)

br.fill()
ml, mlB := mlState.final()
ml += br.getBits(mlB)
ll, llB := llState.final()
ll += br.getBits(llB)

ll, llB := llState.final()
ll += br.getBits(llB)
br.fill()
if ctx.iteration != 0 {
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]

br.fill()
if i != 0 {
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]

lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}

lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
mo = s.adjustOffset(mo, ll, moB)

if ml > maxMatchLen {
return errorMatchLenTooBig
}
if mo == 0 && ml > 0 {
return errorMatchLenOfsMismatch
}

ctx.iteration -= 1
}

mo = s.adjustOffset(mo, ll, moB)
return 0

*/
#define br_value R8 // br.value
#define br_bits_read R9 // br.bitsRead
Expand Down Expand Up @@ -316,7 +340,7 @@ main_loop:
{{template "bitreader_fill" .}}
{{end}}

// if i != 0 {
// if ctx.iteration != 0 {
// nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
// bits := br.get32BitsFast(nBits)
// lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
Expand Down Expand Up @@ -420,20 +444,22 @@ check_triple:
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
*/
XORQ AX, AX
TESTQ BX, BX
SETEQ DL
CMPQ CX, $0
SETHI AL
ANDQ DX, AX
TESTB AL, AL
TESTQ AX, AX
JNZ error_match_len_ofs_mismatch

ADDQ $24, seqs
ADDQ $24, seqs // sizof(seqVals) == 3*8

DECQ decodeAsmContext_iteration(DI)
JNS main_loop

XORQ AX, AX

end:
MOVQ 0(SP), BP
MOVQ AX, ret+24(FP)
Expand Down

0 comments on commit 0dcdbd0

Please sign in to comment.