diff --git a/decode_arm.s b/decode_arm.s index a5a1ce0..5b5e013 100644 --- a/decode_arm.s +++ b/decode_arm.s @@ -109,13 +109,13 @@ copyLiteralFinish: MOVB.NE tmp2, -1(dst) copyLiteralDone: - CMP src, srcend - BEQ end - // Initial part of match length. // This frees up the token register for reuse as offset. AND $15, token, len + CMP src, srcend + BEQ end + // Read offset. ADD.S $2, src BCS shortSrc @@ -188,6 +188,8 @@ copyMatchDone: BNE loop end: + CMP $0, len + BNE corrupt SUB dstorig, dst, tmp1 MOVW tmp1, ret+24(FP) RET diff --git a/decode_arm64.s b/decode_arm64.s index aa73a1c..abc306b 100644 --- a/decode_arm64.s +++ b/decode_arm64.s @@ -112,6 +112,9 @@ copyLiteralShortEnd: MOVB.P tmp4, 1(dst) copyLiteralDone: + // Initial part of match length. + AND $15, token, len + CMP src, srcend BEQ end @@ -123,8 +126,7 @@ copyLiteralDone: MOVHU -2(src), offset CBZ offset, corrupt - // Read match length. - AND $15, token, len + // Read rest of match length. CMP $15, len BNE readMatchlenDone @@ -163,29 +165,30 @@ copyMatchTry8: AND $7, len, lenRem SUB $8, len copyMatchLoop8: - SUBS $8, len MOVD.P 8(match), tmp1 MOVD.P tmp1, 8(dst) + SUBS $8, len BPL copyMatchLoop8 - ADD lenRem, match + MOVD (match)(len), tmp2 // match+len == match+lenRem-8. ADD lenRem, dst - MOVD -8(match), tmp2 + MOVD $0, len MOVD tmp2, -8(dst) B copyMatchDone copyMatchLoop1: - // Finish with a byte-at-a-time copy. - SUB $1, len + // Byte-at-a-time copy for small offsets. MOVBU.P 1(match), tmp2 MOVB.P tmp2, 1(dst) - CBNZ len, copyMatchLoop1 + SUBS $1, len + BNE copyMatchLoop1 copyMatchDone: CMP src, srcend BNE loop end: + CBNZ len, corrupt SUB dstorig, dst, tmp1 MOVD tmp1, ret+48(FP) RET diff --git a/decode_asm.go b/decode_asm.go index eb05ace..4f0bb37 100644 --- a/decode_asm.go +++ b/decode_asm.go @@ -1,3 +1,4 @@ +//go:build (amd64 || arm || arm64) && !appengine && gc && !noasm // +build amd64 arm arm64 // +build !appengine // +build gc