From 8801e2e957d65c696d92420e7d55945c12625e4d Mon Sep 17 00:00:00 2001 From: Liz Fong-Jones Date: Fri, 2 Jun 2023 17:32:30 -0700 Subject: [PATCH] Restore copyMatchTry4 --- internal/lz4block/decode_arm64.s | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/internal/lz4block/decode_arm64.s b/internal/lz4block/decode_arm64.s index c43e8a8d..d2fe11b8 100644 --- a/internal/lz4block/decode_arm64.s +++ b/internal/lz4block/decode_arm64.s @@ -185,7 +185,7 @@ copyMatchTry8: // A 16-at-a-time loop doesn't provide a further speedup. CMP $8, len CCMP HS, offset, $8, $0 - BLO copyMatchLoop1 + BLO copyMatchTry4 AND $7, len, lenRem SUB $8, len @@ -201,8 +201,19 @@ copyMatchLoop8: MOVD tmp2, -8(dst) B copyMatchDone +copyMatchTry4: + // Copy words if both len and offset are at least four. + CMP $4, len + CCMP HS, offset, $4, $0 + BLO copyMatchLoop1 + + MOVWU.P 4(match), tmp2 + MOVWU.P tmp2, 4(dst) + SUBS $4, len + BEQ copyMatchDone + copyMatchLoop1: - // Byte-at-a-time copy for small offsets. + // Byte-at-a-time copy for small offsets <= 3. MOVBU.P 1(match), tmp2 MOVB.P tmp2, 1(dst) SUBS $1, len