Skip to content

Commit

Permalink
Merge pull request #161 from greatroar/amd64-match-copy-48
Browse files Browse the repository at this point in the history
internal/lz4block: Copy literals of <=48 bytes through XMM registers in amd64 decoder
  • Loading branch information
pierrec committed Jan 31, 2022
2 parents 677f6a5 + 257c664 commit 6bd757c
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions internal/lz4block/decode_amd64.s
Expand Up @@ -157,24 +157,28 @@ copy_literal:
CMPQ BX, R8
JA err_short_buf

// whats a good cut off to call memmove?
CMPQ CX, $16
// Copy matches of <=48 bytes through the XMM registers.
CMPQ CX, $48
JGT memmove_lit

// if len(dst[di:]) < 16
// if len(dst[di:]) < 48
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
CMPQ AX, $48
JLT memmove_lit

// if len(src[si:]) < 16
MOVQ R9, AX
SUBQ SI, AX
CMPQ AX, $16
// if len(src[si:]) < 48
MOVQ R9, BX
SUBQ SI, BX
CMPQ BX, $48
JLT memmove_lit

MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)

ADDQ CX, SI
ADDQ CX, DI
Expand Down

0 comments on commit 6bd757c

Please sign in to comment.