diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 12324ab7db1..eb0d208e054 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1286,12 +1286,6 @@ ;; Extractor helpers for various immmediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl move_wide_const_from_u64 (MoveWideConst) u64) -(extern extractor move_wide_const_from_u64 move_wide_const_from_u64) - -(decl move_wide_const_from_negated_u64 (MoveWideConst) u64) -(extern extractor move_wide_const_from_negated_u64 move_wide_const_from_negated_u64) - (decl pure imm_logic_from_u64 (Type u64) ImmLogic) (extern constructor imm_logic_from_u64 imm_logic_from_u64) @@ -1884,27 +1878,36 @@ ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl imm (Type u64) Reg) - -;; 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ -(rule (imm (integral_ty _ty) (move_wide_const_from_u64 n)) - (movz n (OperandSize.Size64))) - -;; 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN -(rule (imm (integral_ty _ty) (move_wide_const_from_negated_u64 n)) - (movn n (OperandSize.Size64))) - -;; Weird logical-instruction immediate in ORI using zero register -(rule (imm (integral_ty _ty) k) - (if-let n (imm_logic_from_u64 $I64 k)) - (orr_imm $I64 (zero_reg) n)) - -(decl load_constant64_full (u64) Reg) +;; Type of extension performed by an immediate helper +(type ImmExtend + (enum + (Sign) + (Zero))) + +;; Arguments: +;; * Immediate type +;; * Way to extend the immediate value to the full width of the destination +;; register +;; * Immediate value - only the bits that fit within the type are used and +;; extended, while the rest are ignored +;; +;; Note that, unlike the convention in the AArch64 backend, this helper leaves +;; all bits in the destination register in a defined state, i.e. smaller types +;; such as `I8` are either sign- or zero-extended. +(decl imm (Type ImmExtend u64) Reg) + +;; Weird logical-instruction immediate in ORI using zero register; to simplify, +;; we only match when we are zero-extending the value. +(rule (imm (integral_ty ty) (ImmExtend.Zero) k) + (if-let n (imm_logic_from_u64 ty k)) + (orr_imm ty (zero_reg) n)) + +(decl load_constant64_full (Type ImmExtend u64) Reg) (extern constructor load_constant64_full load_constant64_full) -;; Fallback for integral 64-bit constants that uses lots of `movk` -(rule (imm (integral_ty _ty) n) - (load_constant64_full n)) +;; Fallback for integral 64-bit constants +(rule (imm (integral_ty ty) extend n) + (load_constant64_full ty extend n)) ;; Sign extension helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 4505d56c4c4..47a30b40a3d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -618,6 +618,10 @@ impl MoveWideConst { }) } } + + pub fn zero() -> MoveWideConst { + MoveWideConst { bits: 0, shift: 0 } + } } /// Advanced SIMD modified immediate as used by MOVI/MVNI. diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index b298a305091..391feb5ba05 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -7,20 +7,20 @@ ;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) - (imm ty n)) + (imm ty (ImmExtend.Zero) n)) ;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (bconst $false))) - (imm ty 0)) + (imm ty (ImmExtend.Zero) 0)) (rule (lower (has_type ty (bconst $true))) - (imm ty 1)) + (imm ty (ImmExtend.Zero) 1)) ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (null))) - (imm ty 0)) + (imm ty (ImmExtend.Zero) 0)) ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -402,7 +402,7 @@ ;; move it into a register. (rule (put_nonzero_in_reg_zext64 (and (value_type ty) (iconst (nonzero_u64_from_imm64 n)))) - (imm ty n)) + (imm ty (ImmExtend.Zero) n)) ;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -437,7 +437,7 @@ ;; Special case for `sdiv` where no checks are needed due to division by a ;; constant meaning the checks are always passed. (rule (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y))))) - (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty y))) + (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y))) ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero. (decl put_nonzero_in_reg_sext64 (Value) Reg) @@ -448,7 +448,7 @@ ;; not zero we can skip the zero check. (rule (put_nonzero_in_reg_sext64 (and (value_type ty) (iconst (nonzero_u64_from_imm64 n)))) - (imm ty n)) + (imm ty (ImmExtend.Sign) n)) ;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -501,14 +501,14 @@ ;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper ;; bits are all zero. (rule (lower (has_type $I128 (uextend x))) - (value_regs (put_in_reg_zext64 x) (imm $I64 0))) + (value_regs (put_in_reg_zext64 x) (imm $I64 (ImmExtend.Zero) 0))) ;; Like above where vector extraction automatically zero-extends extending to ;; i128 only requires generating a 0 constant for the upper bits. (rule (lower (has_type $I128 (uextend (extractlane vec @ (value_type in) (u8_from_uimm8 lane))))) - (value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 0))) + (value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 (ImmExtend.Zero) 0))) ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -892,7 +892,7 @@ (rule (lower (has_type $I128 (rotl x y))) (let ((val ValueRegs x) (amt Reg (value_regs_get y 0)) - (neg_amt Reg (sub $I64 (imm $I64 128) amt)) + (neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt)) (lshift ValueRegs (lower_shl128 val amt)) (rshift ValueRegs (lower_ushr128 val neg_amt))) (value_regs @@ -976,7 +976,7 @@ (rule (lower (has_type $I128 (rotr x y))) (let ((val ValueRegs x) (amt Reg (value_regs_get y 0)) - (neg_amt Reg (sub $I64 (imm $I64 128) amt)) + (neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt)) (rshift ValueRegs (lower_ushr128 val amt)) (lshift ValueRegs (lower_shl128 val neg_amt)) (hi Reg (orr $I64 (value_regs_get rshift 1) (value_regs_get lshift 1))) @@ -1031,7 +1031,7 @@ (let ((hi_clz Reg (a64_clz $I64 (value_regs_get val 1))) (lo_clz Reg (a64_clz $I64 (value_regs_get val 0))) (tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6)))) - (value_regs (madd $I64 lo_clz tmp hi_clz) (imm $I64 0)))) + (value_regs (madd $I64 lo_clz tmp hi_clz) (imm $I64 (ImmExtend.Zero) 0)))) ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1083,7 +1083,7 @@ (maybe_lo Reg (with_flags_reg (cmp64_imm hi_cls (u8_into_imm12 63)) (csel (Cond.Eq) lo_sign_bits (zero_reg))))) - (value_regs (add $I64 maybe_lo hi_cls) (imm $I64 0)))) + (value_regs (add $I64 maybe_lo hi_cls) (imm $I64 (ImmExtend.Zero) 0)))) (rule (lower (has_type ty (cls x))) (a64_cls ty x)) @@ -1137,7 +1137,7 @@ (tmp Reg (mov_to_vec tmp_half (value_regs_get val 1) 1 (VectorSize.Size64x2))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x16))) (added Reg (addv nbits (VectorSize.Size8x16)))) - (value_regs (mov_from_vec added 0 (VectorSize.Size8x16)) (imm $I64 0)))) + (value_regs (mov_from_vec added 0 (VectorSize.Size8x16)) (imm $I64 (ImmExtend.Zero) 0)))) (rule (lower (has_type $I8X16 (popcnt x))) (vec_cnt x (VectorSize.Size8x16))) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index fbe3a90dea6..55f26bf270a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -75,20 +75,12 @@ where } } - fn move_wide_const_from_u64(&mut self, n: u64) -> Option { - MoveWideConst::maybe_from_u64(n) - } - - fn move_wide_const_from_negated_u64(&mut self, n: u64) -> Option { - MoveWideConst::maybe_from_u64(!n) - } - fn imm_logic_from_u64(&mut self, ty: Type, n: u64) -> Option { - let ty = if ty.bits() < 32 { I32 } else { ty }; ImmLogic::maybe_from_u64(n, ty) } fn imm_logic_from_imm64(&mut self, ty: Type, n: Imm64) -> Option { + let ty = if ty.bits() < 32 { I32 } else { ty }; self.imm_logic_from_u64(ty, n.bits() as u64) } @@ -136,7 +128,45 @@ where /// /// The logic here is nontrivial enough that it's not really worth porting /// this over to ISLE. - fn load_constant64_full(&mut self, value: u64) -> Reg { + fn load_constant64_full( + &mut self, + ty: Type, + extend: &generated_code::ImmExtend, + value: u64, + ) -> Reg { + let bits = ty.bits(); + let value = if bits < 64 { + if *extend == generated_code::ImmExtend::Sign { + let shift = 64 - bits; + let value = value as i64; + + ((value << shift) >> shift) as u64 + } else { + value & !(u64::MAX << bits) + } + } else { + value + }; + let rd = self.temp_writable_reg(I64); + + if value == 0 { + self.emit(&MInst::MovWide { + op: MoveWideOp::MovZ, + rd, + imm: MoveWideConst::zero(), + size: OperandSize::Size64, + }); + return rd.to_reg(); + } else if value == u64::MAX { + self.emit(&MInst::MovWide { + op: MoveWideOp::MovN, + rd, + imm: MoveWideConst::zero(), + size: OperandSize::Size64, + }); + return rd.to_reg(); + }; + // If the top 32 bits are zero, use 32-bit `mov` operations. let (num_half_words, size, negated) = if value >> 32 == 0 { (2, OperandSize::Size32, (!value << 32) >> 32) @@ -152,8 +182,6 @@ where let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; let mut first_mov_emitted = false; - let rd = self.temp_writable_reg(I64); - for i in 0..num_half_words { let imm16 = (value >> (16 * i)) & 0xffff; if imm16 != ignored_halfword { diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index 7c47f19798e..b50b482b256 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -74,7 +74,7 @@ block0(v0: i64): } ; block0: -; orr x3, xzr, #2 +; movz w3, #2 ; sdiv x0, x0, x3 ; ret @@ -150,7 +150,7 @@ block0(v0: i32): ; block0: ; sxtw x3, w0 -; orr x5, xzr, #2 +; movz w5, #2 ; sdiv x0, x3, x5 ; ret @@ -176,7 +176,7 @@ block0(v0: i32): ; block0: ; mov w3, w0 -; orr x5, xzr, #2 +; orr w5, wzr, #2 ; udiv x0, x3, x5 ; ret @@ -460,7 +460,7 @@ block0(v0: i64): } ; block0: -; orr x3, xzr, #2 +; movz w3, #2 ; sdiv x5, x0, x3 ; msub x0, x5, x3, x0 ; ret diff --git a/cranelift/filetests/filetests/runtests/arithmetic.clif b/cranelift/filetests/filetests/runtests/arithmetic.clif index fdcdc760ba5..9fd5149b09c 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic.clif @@ -347,6 +347,16 @@ block0(v0: i64,v1: i64): ; run: %sdiv_i64(0xC0FFEEEE_DECAFFFF, -8) == 0x7E002222_426A000 ; run: %sdiv_i64(0x80000000_00000000, -2) == 0x40000000_00000000 +function %sdiv_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -2 + v2 = sdiv v0, v1 + return v2 +} +; run: %sdiv_i64_const(0) == 0 +; run: %sdiv_i64_const(-1) == 0 +; run: %sdiv_i64_const(0xFFFFFFFF_FFFFFFFE) == 1 + function %sdiv_i32(i32, i32) -> i32 { block0(v0: i32,v1: i32): v2 = sdiv v0, v1 @@ -363,6 +373,16 @@ block0(v0: i32,v1: i32): ; run: %sdiv_i32(0xC0FFEEEE, -8) == 0x7E00222 ; run: %sdiv_i32(0x80000000, -2) == 0x40000000 +function %sdiv_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -2 + v2 = sdiv v0, v1 + return v2 +} +; run: %sdiv_i32_const(0) == 0 +; run: %sdiv_i32_const(-1) == 0 +; run: %sdiv_i32_const(0xFFFFFFFE) == 1 + function %sdiv_i16(i16, i16) -> i16 { block0(v0: i16,v1: i16): v2 = sdiv v0, v1 @@ -379,6 +399,16 @@ block0(v0: i16,v1: i16): ; run: %sdiv_i16(0xC0FF, -8) == 0x07E0 ; run: %sdiv_i16(0x8000, -2) == 0x4000 +function %sdiv_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 -2 + v2 = sdiv v0, v1 + return v2 +} +; run: %sdiv_i16_const(0) == 0 +; run: %sdiv_i16_const(-1) == 0 +; run: %sdiv_i16_const(0xFFFE) == 1 + function %sdiv_i8(i8, i8) -> i8 { block0(v0: i8,v1: i8): v2 = sdiv v0, v1 @@ -395,6 +425,15 @@ block0(v0: i8,v1: i8): ; run: %sdiv_i8(0xC0, -8) == 0x08 ; run: %sdiv_i8(0x80, -2) == 0x40 +function %sdiv_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -2 + v2 = sdiv v0, v1 + return v2 +} +; run: %sdiv_i8_const(0) == 0 +; run: %sdiv_i8_const(-1) == 0 +; run: %sdiv_i8_const(0xFE) == 1 function %udiv_i64(i64, i64) -> i64 { block0(v0: i64,v1: i64): @@ -413,6 +452,16 @@ block0(v0: i64,v1: i64): ; run: %udiv_i64(0x80000000_00000000, -1) == 0 ; run: %udiv_i64(0x80000000_00000000, -2) == 0 +function %udiv_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -2 + v2 = udiv v0, v1 + return v2 +} +; run: %udiv_i64_const(0) == 0 +; run: %udiv_i64_const(-1) == 1 +; run: %udiv_i64_const(0xFFFFFFFF_FFFFFFFE) == 1 + function %udiv_i32(i32, i32) -> i32 { block0(v0: i32,v1: i32): v2 = udiv v0, v1 @@ -430,6 +479,16 @@ block0(v0: i32,v1: i32): ; run: %udiv_i32(0x80000000, -1) == 0 ; run: %udiv_i32(0x80000000, -2) == 0 +function %udiv_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -2 + v2 = udiv v0, v1 + return v2 +} +; run: %udiv_i32_const(0) == 0 +; run: %udiv_i32_const(-1) == 1 +; run: %udiv_i32_const(0xFFFFFFFE) == 1 + function %udiv_i16(i16, i16) -> i16 { block0(v0: i16,v1: i16): v2 = udiv v0, v1 @@ -447,6 +506,16 @@ block0(v0: i16,v1: i16): ; run: %udiv_i16(0x8000, -1) == 0 ; run: %udiv_i16(0x8000, -2) == 0 +function %udiv_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 -2 + v2 = udiv v0, v1 + return v2 +} +; run: %udiv_i16_const(0) == 0 +; run: %udiv_i16_const(-1) == 1 +; run: %udiv_i16_const(0xFFFE) == 1 + function %udiv_i8(i8, i8) -> i8 { block0(v0: i8,v1: i8): v2 = udiv v0, v1 @@ -463,3 +532,13 @@ block0(v0: i8,v1: i8): ; run: %udiv_i8(0xC0, -8) == 0 ; run: %udiv_i8(0x80, -1) == 0 ; run: %udiv_i8(0x80, -2) == 0 + +function %udiv_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -2 + v2 = udiv v0, v1 + return v2 +} +; run: %udiv_i8_const(0) == 0 +; run: %udiv_i8_const(-1) == 1 +; run: %udiv_i8_const(0xFE) == 1