From d89e2b3361b877447744f3e675529543ce978be6 Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonso360@users.noreply.github.com>
Date: Wed, 8 May 2024 16:18:10 +0100
Subject: [PATCH] riscv64: Add remaining Zfa Instructions (#8582)

* riscv64: Add `fround` instruction

* riscv64: Remove unused load_fp functions

* riscv64: Add support for `fli` instruction

* riscv64: Add negated `fli` rules
---
 cranelift/codegen/src/isa/riscv64/inst.isle   |  63 +-
 .../codegen/src/isa/riscv64/inst/args.rs      | 130 +++-
 .../codegen/src/isa/riscv64/inst/emit.rs      |   4 +
 .../src/isa/riscv64/inst/emit_tests.rs        |  20 +
 .../codegen/src/isa/riscv64/inst/encode.rs    |  21 +
 cranelift/codegen/src/isa/riscv64/inst/mod.rs |  48 +-
 .../codegen/src/isa/riscv64/lower/isle.rs     |  14 +
 .../filetests/filetests/isa/riscv64/zfa.clif  | 603 ++++++++++++++++++
 .../filetests/filetests/runtests/ceil.clif    |   1 +
 .../filetests/runtests/f32const.clif          |  61 ++
 .../filetests/runtests/f64const.clif          |  60 ++
 .../filetests/filetests/runtests/floor.clif   |   1 +
 .../filetests/filetests/runtests/nearest.clif |   1 +
 .../filetests/filetests/runtests/trunc.clif   |   1 +
 14 files changed, 975 insertions(+), 53 deletions(-)
 create mode 100644 cranelift/filetests/filetests/runtests/f32const.clif
 create mode 100644 cranelift/filetests/filetests/runtests/f64const.clif

diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle
index 6a6704b16d4..e38a3d110bd 100644
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -19,6 +19,11 @@
       (rd WritableReg)
       (imm Imm20))
 
+    (Fli
+      (ty Type)
+      (imm FliConstant)
+      (rd WritableReg))
+
     ;; An ALU operation with one register sources and a register destination.
     (FpuRR
       (alu_op FpuOPRR)
@@ -411,6 +416,8 @@
   (QNaN)
 ))
 
+(type FliConstant (primitive FliConstant))
+
 (type FpuOPRR (enum
   ;; RV32F Standard Extension
   (FsqrtS)
@@ -447,8 +454,10 @@
   (FcvtWuD)
   (FcvtDW)
   (FcvtDWU)
-  ;; bitmapip
 
+  ;; Zfa Extension
+  (FroundS)
+  (FroundD)
 ))
 
 (type LoadOP (enum
@@ -1549,6 +1558,20 @@
 (rule (rv_fmaxm $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxmS) $F32 (FRM.RUP) rs1 rs2))
 (rule (rv_fmaxm $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxmD) $F64 (FRM.RUP) rs1 rs2))
 
+;; Helper for emitting the `fround` instruction.
+(decl rv_fround (Type FRM FReg) FReg)
+(rule (rv_fround $F32 frm rs) (fpu_rr (FpuOPRR.FroundS) $F32 frm rs))
+(rule (rv_fround $F64 frm rs) (fpu_rr (FpuOPRR.FroundD) $F64 frm rs))
+
+;; Helper for emitting the `fli` instruction.
+(decl rv_fli (Type FliConstant) FReg)
+(rule (rv_fli ty imm)
+      (let ((dst WritableFReg (temp_writable_freg))
+            (_ Unit (emit (MInst.Fli ty
+                                     imm
+                                     dst))))
+        dst))
+
 ;; `Zba` Extension Instructions
 
 ;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
@@ -1778,6 +1801,14 @@
 
 
 
+;; Helper for generating a FliConstant from a u64 constant
+(decl pure partial fli_constant_from_u64 (Type u64) FliConstant)
+(extern constructor fli_constant_from_u64 fli_constant_from_u64)
+
+;; Helper for generating a FliConstant from a u64 negated constant
+(decl pure partial fli_constant_from_negated_u64 (Type u64) FliConstant)
+(extern constructor fli_constant_from_negated_u64 fli_constant_from_negated_u64)
+
 ;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants
 (decl i64_generate_imm (Imm20 Imm12) i64)
 (extern extractor i64_generate_imm i64_generate_imm)
@@ -1795,14 +1826,30 @@
 ;; TODO: Load floats using `fld` instead of `ld`
 (decl imm (Type u64) Reg)
 
-;; Refs get loaded as integers.
-(rule 5 (imm $R32 c) (imm $I32 c))
-(rule 5 (imm $R64 c) (imm $I64 c))
+;; If Zfa is enabled, we can load certain constants with the `fli` instruction.
+(rule 7 (imm (ty_scalar_float ty) imm)
+  (if-let $true (has_zfa))
+  (if-let const (fli_constant_from_u64 ty imm))
+  (rv_fli ty const))
 
-;; Floats get loaded as integers and then moved into an F register.
+;; It is beneficial to load the negated constant with `fli` and then negate it
+;; in a register.
+;;
+;; For f64's this saves one instruction, and for f32's it avoids
+;; having to allocate an integer register, reducing integer register pressure.
+(rule 6 (imm (ty_scalar_float ty) imm)
+  (if-let $true (has_zfa))
+  (if-let const (fli_constant_from_negated_u64 ty imm))
+  (rv_fneg ty (rv_fli ty const)))
+
+;; Otherwise floats get loaded as integers and then moved into an F register.
 (rule 5 (imm $F32 c) (gen_bitcast (imm $I32 c) $I32 $F32))
 (rule 5 (imm $F64 c) (gen_bitcast (imm $I64 c) $I64 $F64))
 
+;; Refs get loaded as integers.
+(rule 5 (imm $R32 c) (imm $I32 c))
+(rule 5 (imm $R64 c) (imm $I64 c))
+
 ;; Try to match just an imm12
 (rule 4 (imm (ty_int ty) c)
   (if-let (i64_generate_imm (imm20_is_zero) imm12) (i64_sextend_u64 ty c))
@@ -2470,7 +2517,7 @@
 (rule (float_round_fcvt $F64 frm rs) (rv_fcvtdl frm (rv_fcvtld frm rs)))
 
 (decl gen_float_round (FRM FReg Type) FReg)
-(rule (gen_float_round frm rs ty)
+(rule 0 (gen_float_round frm rs ty)
   (let (;; if rs is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
         ;; in mantissa, the result is the same as src, check for these cases first.
         (max FReg (imm ty (float_int_max ty)))
@@ -2491,6 +2538,10 @@
     ;; Check if the value cannot be rounded exactly and return the source input if so
     (gen_select_freg (cmp_eqz exact) corrected_nan rounded)))
 
+;; With Zfa we can use the dedicated `fround` instruction.
+(rule 1 (gen_float_round frm rs ty)
+  (if-let $true (has_zfa))
+  (rv_fround ty frm rs))
 
 
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs
index 561d18da843..d8e59f8b93a 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/args.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs
@@ -312,6 +312,116 @@ impl IntegerCompare {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct FliConstant(u8);
+
+impl FliConstant {
+    pub(crate) fn new(value: u8) -> Self {
+        debug_assert!(value <= 31, "Invalid FliConstant: {}", value);
+        Self(value)
+    }
+
+    pub(crate) fn maybe_from_u64(ty: Type, imm: u64) -> Option<Self> {
+        // Convert the value into an F64, this allows us to represent
+        // values from both f32 and f64 in the same value.
+        let value = match ty {
+            F32 => f32::from_bits(imm as u32) as f64,
+            F64 => f64::from_bits(imm),
+            _ => unimplemented!(),
+        };
+
+        Some(match (ty, value) {
+            (_, f) if f == -1.0 => Self::new(0),
+
+            // Since f64 can represent all f32 values, f32::min_positive won't be
+            // the same as f64::min_positive, so we need to check for both indepenendtly
+            (F32, f) if f == (f32::MIN_POSITIVE as f64) => Self::new(1),
+            (F64, f) if f == f64::MIN_POSITIVE => Self::new(1),
+
+            (_, f) if f == 2.0f64.powi(-16) => Self::new(2),
+            (_, f) if f == 2.0f64.powi(-15) => Self::new(3),
+            (_, f) if f == 2.0f64.powi(-8) => Self::new(4),
+            (_, f) if f == 2.0f64.powi(-7) => Self::new(5),
+            (_, f) if f == 0.0625 => Self::new(6),
+            (_, f) if f == 0.125 => Self::new(7),
+            (_, f) if f == 0.25 => Self::new(8),
+            (_, f) if f == 0.3125 => Self::new(9),
+            (_, f) if f == 0.375 => Self::new(10),
+            (_, f) if f == 0.4375 => Self::new(11),
+            (_, f) if f == 0.5 => Self::new(12),
+            (_, f) if f == 0.625 => Self::new(13),
+            (_, f) if f == 0.75 => Self::new(14),
+            (_, f) if f == 0.875 => Self::new(15),
+            (_, f) if f == 1.0 => Self::new(16),
+            (_, f) if f == 1.25 => Self::new(17),
+            (_, f) if f == 1.5 => Self::new(18),
+            (_, f) if f == 1.75 => Self::new(19),
+            (_, f) if f == 2.0 => Self::new(20),
+            (_, f) if f == 2.5 => Self::new(21),
+            (_, f) if f == 3.0 => Self::new(22),
+            (_, f) if f == 4.0 => Self::new(23),
+            (_, f) if f == 8.0 => Self::new(24),
+            (_, f) if f == 16.0 => Self::new(25),
+            (_, f) if f == 128.0 => Self::new(26),
+            (_, f) if f == 256.0 => Self::new(27),
+            (_, f) if f == 32768.0 => Self::new(28),
+            (_, f) if f == 65536.0 => Self::new(29),
+            (_, f) if f == f64::INFINITY => Self::new(30),
+
+            // NaN's are not guaranteed to preserve the sign / payload bits, so we need to check
+            // the original bits directly.
+            (F32, f) if f.is_nan() && imm == 0x7fc0_0000 => Self::new(31), // Canonical NaN
+            (F64, f) if f.is_nan() && imm == 0x7ff8_0000_0000_0000 => Self::new(31), // Canonical NaN
+            _ => return None,
+        })
+    }
+
+    pub(crate) fn format(self) -> &'static str {
+        // The preferred assembly syntax for entries 1, 30, and 31 is min, inf, and nan, respectively.
+        // For entries 0 through 29 (including entry 1), the assembler will accept decimal constants
+        // in C-like syntax.
+        match self.0 {
+            0 => "-1.0",
+            1 => "min",
+            2 => "2^-16",
+            3 => "2^-15",
+            4 => "2^-8",
+            5 => "2^-7",
+            6 => "0.0625",
+            7 => "0.125",
+            8 => "0.25",
+            9 => "0.3125",
+            10 => "0.375",
+            11 => "0.4375",
+            12 => "0.5",
+            13 => "0.625",
+            14 => "0.75",
+            15 => "0.875",
+            16 => "1.0",
+            17 => "1.25",
+            18 => "1.5",
+            19 => "1.75",
+            20 => "2.0",
+            21 => "2.5",
+            22 => "3.0",
+            23 => "4.0",
+            24 => "8.0",
+            25 => "16.0",
+            26 => "128.0",
+            27 => "256.0",
+            28 => "32768.0",
+            29 => "65536.0",
+            30 => "inf",
+            31 => "nan",
+            _ => panic!("Invalid FliConstant"),
+        }
+    }
+
+    pub(crate) fn bits(self) -> u8 {
+        self.0
+    }
+}
+
 impl FpuOPRRRR {
     pub(crate) fn op_name(self) -> &'static str {
         match self {
@@ -376,6 +486,8 @@ impl FpuOPRR {
             Self::FcvtWuD => "fcvt.wu.d",
             Self::FcvtDW => "fcvt.d.w",
             Self::FcvtDWU => "fcvt.d.wu",
+            Self::FroundS => "fround.s",
+            Self::FroundD => "fround.d",
         }
     }
 
@@ -392,14 +504,6 @@ impl FpuOPRR {
             _ => false,
         }
     }
-    // move from x register to float register.
-    pub(crate) fn move_x_to_f_op(ty: Type) -> Self {
-        match ty {
-            F32 => Self::FmvWX,
-            F64 => Self::FmvDX,
-            _ => unreachable!("ty:{:?}", ty),
-        }
-    }
 
     pub(crate) fn op_code(self) -> u32 {
         match self {
@@ -428,7 +532,9 @@ impl FpuOPRR {
             | FpuOPRR::FcvtWD
             | FpuOPRR::FcvtWuD
             | FpuOPRR::FcvtDW
-            | FpuOPRR::FcvtDWU => 0b1010011,
+            | FpuOPRR::FcvtDWU
+            | FpuOPRR::FroundS
+            | FpuOPRR::FroundD => 0b1010011,
         }
     }
 
@@ -460,6 +566,8 @@ impl FpuOPRR {
             FpuOPRR::FcvtDW => 0b00000,
             FpuOPRR::FcvtDWU => 0b00001,
             FpuOPRR::FsqrtD => 0b00000,
+            FpuOPRR::FroundS => 0b00100,
+            FpuOPRR::FroundD => 0b00100,
         }
     }
     pub(crate) fn funct7(self) -> u32 {
@@ -482,8 +590,8 @@ impl FpuOPRR {
             FpuOPRR::FcvtDL => 0b1101001,
             FpuOPRR::FcvtDLu => 0b1101001,
             FpuOPRR::FmvDX => 0b1111001,
-            FpuOPRR::FcvtSD => 0b0100000,
-            FpuOPRR::FcvtDS => 0b0100001,
+            FpuOPRR::FcvtSD | FpuOPRR::FroundS => 0b0100000,
+            FpuOPRR::FcvtDS | FpuOPRR::FroundD => 0b0100001,
             FpuOPRR::FclassD => 0b1110001,
             FpuOPRR::FcvtWD => 0b1100001,
             FpuOPRR::FcvtWuD => 0b1100001,
diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
index f7318af9999..f3a5ea4d317 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -149,6 +149,7 @@ impl Inst {
             | Inst::Nop4
             | Inst::BrTable { .. }
             | Inst::Auipc { .. }
+            | Inst::Fli { .. }
             | Inst::Lui { .. }
             | Inst::LoadInlineConst { .. }
             | Inst::AluRRR { .. }
@@ -875,6 +876,9 @@ impl Inst {
                 let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
                 sink.put4(x);
             }
+            &Inst::Fli { rd, ty, imm } => {
+                sink.put4(encode_fli(ty, imm, rd));
+            }
             &Inst::LoadInlineConst { rd, ty, imm } => {
                 let data = &imm.to_le_bytes()[..ty.bytes() as usize];
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
index b7c0df9cc15..43b31e2c746 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
@@ -2084,6 +2084,26 @@ fn test_riscv64_binemit() {
         0x22b59553,
     ));
 
+    insns.push(TestUnit::new(
+        Inst::Fli {
+            ty: F32,
+            rd: writable_fa0(),
+            imm: FliConstant::new(0),
+        },
+        "fli.s fa0,-1.0",
+        0xf0100553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Fli {
+            ty: F64,
+            rd: writable_fa0(),
+            imm: FliConstant::new(13),
+        },
+        "fli.d fa0,0.625",
+        0xf2168553,
+    ));
+
     let (flags, isa_flags) = make_test_flags();
     let emit_info = EmitInfo::new(flags, isa_flags);
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
index 4ad7d46e333..801d10f3256 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
@@ -652,3 +652,24 @@ pub fn encode_zcbmem_load(op: ZcbMemOp, rd: WritableReg, base: Reg, imm: Uimm2)
 pub fn encode_zcbmem_store(op: ZcbMemOp, src: Reg, base: Reg, imm: Uimm2) -> u16 {
     encode_zcbmem_bits(op, src, base, imm)
 }
+
+pub fn encode_fli(ty: Type, imm: FliConstant, rd: WritableReg) -> u32 {
+    // FLI.{S,D} is encoded as a FMV.{W,D} instruction with rs2 set to the
+    // immediate value to be loaded.
+    let op = match ty {
+        F32 => FpuOPRR::FmvWX,
+        F64 => FpuOPRR::FmvDX,
+        _ => unreachable!(),
+    };
+    let frm = 0; // FRM is hard coded to 0 in both instructions
+    let rs2 = 1; // rs2 set to 1 is what differentiates FLI from FMV
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op_code(), 7);
+    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(frm, 3) << 12;
+    bits |= unsigned_field_width(imm.bits() as u32, 5) << 15;
+    bits |= unsigned_field_width(rs2, 6) << 20;
+    bits |= unsigned_field_width(op.funct7(), 7) << 25;
+    bits
+}
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index 33fd8c04c32..484552afc72 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -236,42 +236,6 @@ impl Inst {
             .expect("code range is too big.")
     }
 
-    /// Create instructions that load a 32-bit floating-point constant.
-    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        const_data: u32,
-        mut alloc_tmp: F,
-    ) -> SmallVec<[Inst; 4]> {
-        let mut insts = SmallVec::new();
-        let tmp = alloc_tmp(I64);
-        insts.extend(Self::load_constant_u32(tmp, const_data as u64));
-        insts.push(Inst::FpuRR {
-            frm: FRM::RNE,
-            alu_op: FpuOPRR::move_x_to_f_op(F32),
-            rd,
-            rs: tmp.to_reg(),
-        });
-        insts
-    }
-
-    /// Create instructions that load a 64-bit floating-point constant.
-    pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        const_data: u64,
-        mut alloc_tmp: F,
-    ) -> SmallVec<[Inst; 4]> {
-        let mut insts = SmallInstVec::new();
-        let tmp = alloc_tmp(I64);
-        insts.extend(Self::load_constant_u64(tmp, const_data));
-        insts.push(Inst::FpuRR {
-            frm: FRM::RNE,
-            alu_op: FpuOPRR::move_x_to_f_op(F64),
-            rd,
-            rs: tmp.to_reg(),
-        });
-        insts
-    }
-
     /// Generic constructor for a load (zero-extending where appropriate).
     pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
         if ty.is_vector() {
@@ -346,6 +310,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
         }
         Inst::Auipc { rd, .. } => collector.reg_def(rd),
         Inst::Lui { rd, .. } => collector.reg_def(rd),
+        Inst::Fli { rd, .. } => collector.reg_def(rd),
         Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
         Inst::AluRRR { rd, rs1, rs2, .. } => {
             collector.reg_use(rs1);
@@ -1172,6 +1137,17 @@ impl Inst {
                     imm.as_i32()
                 )
             }
+            &Inst::Fli { rd, ty, imm } => {
+                let rd_s = format_reg(rd.to_reg(), allocs);
+                let imm_s = imm.format();
+                let suffix = match ty {
+                    F32 => "s",
+                    F64 => "d",
+                    _ => unreachable!(),
+                };
+
+                format!("fli.{suffix} {rd_s},{imm_s}")
+            }
             &Inst::LoadInlineConst { rd, imm, .. } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 let mut buf = String::new();
diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
index 64f32bd4755..f55ecd958c2 100644
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -189,6 +189,20 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
         Imm12::from_i16(imm.as_i16() & (x as i16))
     }
 
+    fn fli_constant_from_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
+        FliConstant::maybe_from_u64(ty, imm)
+    }
+
+    fn fli_constant_from_negated_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
+        let negated_imm = match ty {
+            F64 => imm ^ 0x8000000000000000,
+            F32 => imm ^ 0x80000000,
+            _ => unimplemented!(),
+        };
+
+        FliConstant::maybe_from_u64(ty, negated_imm)
+    }
+
     fn i64_generate_imm(&mut self, imm: i64) -> Option<(Imm20, Imm12)> {
         MInst::generate_imm(imm as u64)
     }
diff --git a/cranelift/filetests/filetests/isa/riscv64/zfa.clif b/cranelift/filetests/filetests/isa/riscv64/zfa.clif
index 60193e6840d..4c3345bb5fa 100644
--- a/cranelift/filetests/filetests/isa/riscv64/zfa.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/zfa.clif
@@ -68,3 +68,606 @@ block0(v0: f64, v1: f64):
 ;   .byte 0x53, 0x35, 0xb5, 0x2a
 ;   ret
 
+
+
+function %ceil_f32(f32) -> f32 {
+block0(v0: f32):
+  v1 = ceil v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.s fa0,fa0,rup
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x35, 0x45, 0x40
+;   ret
+
+
+function %ceil_f64(f64) -> f64 {
+block0(v0: f64):
+  v1 = ceil v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.d fa0,fa0,rup
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x35, 0x45, 0x42
+;   ret
+
+
+function %floor_f32(f32) -> f32 {
+block0(v0: f32):
+  v1 = floor v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.s fa0,fa0,rdn
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x25, 0x45, 0x40
+;   ret
+
+
+function %floor_f64(f64) -> f64 {
+block0(v0: f64):
+  v1 = floor v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.d fa0,fa0,rdn
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x25, 0x45, 0x42
+;   ret
+
+
+function %trunc_f32(f32) -> f32 {
+block0(v0: f32):
+  v1 = trunc v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.s fa0,fa0,rtz
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x15, 0x45, 0x40
+;   ret
+
+
+function %trunc_f64(f64) -> f64 {
+block0(v0: f64):
+  v1 = trunc v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.d fa0,fa0,rtz
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x15, 0x45, 0x42
+;   ret
+
+
+function %nearest_f32(f32) -> f32 {
+block0(v0: f32):
+  v1 = nearest v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.s fa0,fa0,rne
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x05, 0x45, 0x40
+;   ret
+
+
+function %nearest_f64(f64) -> f64 {
+block0(v0: f64):
+  v1 = nearest v0
+  return v1
+}
+
+; VCode:
+; block0:
+;   fround.d fa0,fa0,rne
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x53, 0x05, 0x45, 0x42
+;   ret
+
+
+function %fli_s() -> f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32 {
+block0:
+    v0 = f32const -0x1.p0
+    v1 = f32const 0x1.p-126
+    v2 = f32const 0x1.p-16
+    v3 = f32const 0x1.p-15
+    v4 = f32const 0x1.p-8
+    v5 = f32const 0x1.p-7
+    v6 = f32const 0x1.p-4
+    v7 = f32const 0x1.p-3
+    v8 = f32const 0x1.p-2
+    v9 = f32const 0x1.4p-2
+    v10 = f32const 0x1.8p-2
+    v11 = f32const 0x1.cp-2
+    v12 = f32const 0x1.p-1
+    v13 = f32const 0x1.4p-1
+    v14 = f32const 0x1.8p-1
+    v15 = f32const 0x1.cp-1
+    v16 = f32const 0x1.p0
+    v17 = f32const 0x1.4p0
+    v18 = f32const 0x1.8p0
+    v19 = f32const 0x1.cp0
+    v20 = f32const 0x1.p1
+    v21 = f32const 0x1.4p1
+    v22 = f32const 0x1.8p1
+    v23 = f32const 0x1.p2
+    v24 = f32const 0x1.p3
+    v25 = f32const 0x1.p4
+    v26 = f32const 0x1.p7
+    v27 = f32const 0x1.p8
+    v28 = f32const 0x1.p15
+    v29 = f32const 0x1.p16
+    v30 = f32const +Inf
+    v31 = f32const +NaN
+
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
+}
+
+; VCode:
+;   addi sp,sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   addi sp,sp,-96
+;   fsd fs0,88(sp)
+;   fsd fs2,80(sp)
+;   fsd fs3,72(sp)
+;   fsd fs4,64(sp)
+;   fsd fs5,56(sp)
+;   fsd fs6,48(sp)
+;   fsd fs7,40(sp)
+;   fsd fs8,32(sp)
+;   fsd fs9,24(sp)
+;   fsd fs10,16(sp)
+;   fsd fs11,8(sp)
+; block0:
+;   fli.s fa0,-1.0
+;   fli.s fa1,min
+;   fli.s fa3,2^-16
+;   fli.s fa4,2^-15
+;   fli.s fa5,2^-8
+;   fli.s fa2,2^-7
+;   fli.s ft7,0.0625
+;   fli.s fa6,0.125
+;   fli.s fa7,0.25
+;   fli.s ft8,0.3125
+;   fli.s ft9,0.375
+;   fli.s ft10,0.4375
+;   fli.s ft11,0.5
+;   fli.s fs0,0.625
+;   fli.s fs1,0.75
+;   fli.s fs2,0.875
+;   fli.s fs3,1.0
+;   fli.s fs4,1.25
+;   fli.s fs5,1.5
+;   fli.s fs6,1.75
+;   fli.s fs7,2.0
+;   fli.s fs8,2.5
+;   fli.s fs9,3.0
+;   fli.s fs10,4.0
+;   fli.s fs11,8.0
+;   fli.s ft0,16.0
+;   fli.s ft1,128.0
+;   fli.s ft2,256.0
+;   fli.s ft3,32768.0
+;   fli.s ft4,65536.0
+;   fli.s ft5,inf
+;   fli.s ft6,nan
+;   fsw fa3,0(a0)
+;   fsw fa4,8(a0)
+;   fsw fa5,16(a0)
+;   fsw fa2,24(a0)
+;   fsw ft7,32(a0)
+;   fsw fa6,40(a0)
+;   fsw fa7,48(a0)
+;   fsw ft8,56(a0)
+;   fsw ft9,64(a0)
+;   fsw ft10,72(a0)
+;   fsw ft11,80(a0)
+;   fsw fs0,88(a0)
+;   fsw fs1,96(a0)
+;   fsw fs2,104(a0)
+;   fsw fs3,112(a0)
+;   fsw fs4,120(a0)
+;   fsw fs5,128(a0)
+;   fsw fs6,136(a0)
+;   fsw fs7,144(a0)
+;   fsw fs8,152(a0)
+;   fsw fs9,160(a0)
+;   fsw fs10,168(a0)
+;   fsw fs11,176(a0)
+;   fsw ft0,184(a0)
+;   fsw ft1,192(a0)
+;   fsw ft2,200(a0)
+;   fsw ft3,208(a0)
+;   fsw ft4,216(a0)
+;   fsw ft5,224(a0)
+;   fsw ft6,232(a0)
+;   fld fs0,88(sp)
+;   fld fs2,80(sp)
+;   fld fs3,72(sp)
+;   fld fs4,64(sp)
+;   fld fs5,56(sp)
+;   fld fs6,48(sp)
+;   fld fs7,40(sp)
+;   fld fs8,32(sp)
+;   fld fs9,24(sp)
+;   fld fs10,16(sp)
+;   fld fs11,8(sp)
+;   addi sp,sp,96
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   addi sp,sp,16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   mv s0, sp
+;   addi sp, sp, -0x60
+;   fsd fs0, 0x58(sp)
+;   fsd fs2, 0x50(sp)
+;   fsd fs3, 0x48(sp)
+;   fsd fs4, 0x40(sp)
+;   fsd fs5, 0x38(sp)
+;   fsd fs6, 0x30(sp)
+;   fsd fs7, 0x28(sp)
+;   fsd fs8, 0x20(sp)
+;   fsd fs9, 0x18(sp)
+;   fsd fs10, 0x10(sp)
+;   fsd fs11, 8(sp)
+; block1: ; offset 0x40
+;   .byte 0x53, 0x05, 0x10, 0xf0
+;   .byte 0xd3, 0x85, 0x10, 0xf0
+;   .byte 0xd3, 0x06, 0x11, 0xf0
+;   .byte 0x53, 0x87, 0x11, 0xf0
+;   .byte 0xd3, 0x07, 0x12, 0xf0
+;   .byte 0x53, 0x86, 0x12, 0xf0
+;   .byte 0xd3, 0x03, 0x13, 0xf0
+;   .byte 0x53, 0x88, 0x13, 0xf0
+;   .byte 0xd3, 0x08, 0x14, 0xf0
+;   .byte 0x53, 0x8e, 0x14, 0xf0
+;   .byte 0xd3, 0x0e, 0x15, 0xf0
+;   .byte 0x53, 0x8f, 0x15, 0xf0
+;   .byte 0xd3, 0x0f, 0x16, 0xf0
+;   .byte 0x53, 0x84, 0x16, 0xf0
+;   .byte 0xd3, 0x04, 0x17, 0xf0
+;   .byte 0x53, 0x89, 0x17, 0xf0
+;   .byte 0xd3, 0x09, 0x18, 0xf0
+;   .byte 0x53, 0x8a, 0x18, 0xf0
+;   .byte 0xd3, 0x0a, 0x19, 0xf0
+;   .byte 0x53, 0x8b, 0x19, 0xf0
+;   .byte 0xd3, 0x0b, 0x1a, 0xf0
+;   .byte 0x53, 0x8c, 0x1a, 0xf0
+;   .byte 0xd3, 0x0c, 0x1b, 0xf0
+;   .byte 0x53, 0x8d, 0x1b, 0xf0
+;   .byte 0xd3, 0x0d, 0x1c, 0xf0
+;   .byte 0x53, 0x80, 0x1c, 0xf0
+;   .byte 0xd3, 0x00, 0x1d, 0xf0
+;   .byte 0x53, 0x81, 0x1d, 0xf0
+;   .byte 0xd3, 0x01, 0x1e, 0xf0
+;   .byte 0x53, 0x82, 0x1e, 0xf0
+;   .byte 0xd3, 0x02, 0x1f, 0xf0
+;   .byte 0x53, 0x83, 0x1f, 0xf0
+;   fsw fa3, 0(a0)
+;   fsw fa4, 8(a0)
+;   fsw fa5, 0x10(a0)
+;   fsw fa2, 0x18(a0)
+;   fsw ft7, 0x20(a0)
+;   fsw fa6, 0x28(a0)
+;   fsw fa7, 0x30(a0)
+;   fsw ft8, 0x38(a0)
+;   fsw ft9, 0x40(a0)
+;   fsw ft10, 0x48(a0)
+;   fsw ft11, 0x50(a0)
+;   fsw fs0, 0x58(a0)
+;   fsw fs1, 0x60(a0)
+;   fsw fs2, 0x68(a0)
+;   fsw fs3, 0x70(a0)
+;   fsw fs4, 0x78(a0)
+;   fsw fs5, 0x80(a0)
+;   fsw fs6, 0x88(a0)
+;   fsw fs7, 0x90(a0)
+;   fsw fs8, 0x98(a0)
+;   fsw fs9, 0xa0(a0)
+;   fsw fs10, 0xa8(a0)
+;   fsw fs11, 0xb0(a0)
+;   fsw ft0, 0xb8(a0)
+;   fsw ft1, 0xc0(a0)
+;   fsw ft2, 0xc8(a0)
+;   fsw ft3, 0xd0(a0)
+;   fsw ft4, 0xd8(a0)
+;   fsw ft5, 0xe0(a0)
+;   fsw ft6, 0xe8(a0)
+;   fld fs0, 0x58(sp)
+;   fld fs2, 0x50(sp)
+;   fld fs3, 0x48(sp)
+;   fld fs4, 0x40(sp)
+;   fld fs5, 0x38(sp)
+;   fld fs6, 0x30(sp)
+;   fld fs7, 0x28(sp)
+;   fld fs8, 0x20(sp)
+;   fld fs9, 0x18(sp)
+;   fld fs10, 0x10(sp)
+;   fld fs11, 8(sp)
+;   addi sp, sp, 0x60
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fli_d() -> f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64 {
+block0:
+    v0 = f64const -0x1.p0
+    v1 = f64const 0x1.p-1022
+    v2 = f64const 0x1.p-16
+    v3 = f64const 0x1.p-15
+    v4 = f64const 0x1.p-8
+    v5 = f64const 0x1.p-7
+    v6 = f64const 0x1.p-4
+    v7 = f64const 0x1.p-3
+    v8 = f64const 0x1.p-2
+    v9 = f64const 0x1.4p-2
+    v10 = f64const 0x1.8p-2
+    v11 = f64const 0x1.cp-2
+    v12 = f64const 0x1.p-1
+    v13 = f64const 0x1.4p-1
+    v14 = f64const 0x1.8p-1
+    v15 = f64const 0x1.cp-1
+    v16 = f64const 0x1.p0
+    v17 = f64const 0x1.4p0
+    v18 = f64const 0x1.8p0
+    v19 = f64const 0x1.cp0
+    v20 = f64const 0x1.p1
+    v21 = f64const 0x1.4p1
+    v22 = f64const 0x1.8p1
+    v23 = f64const 0x1.p2
+    v24 = f64const 0x1.p3
+    v25 = f64const 0x1.p4
+    v26 = f64const 0x1.p7
+    v27 = f64const 0x1.p8
+    v28 = f64const 0x1.p15
+    v29 = f64const 0x1.p16
+    v30 = f64const +Inf
+    v31 = f64const +NaN
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
+}
+
+; VCode:
+;   addi sp,sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   addi sp,sp,-96
+;   fsd fs0,88(sp)
+;   fsd fs2,80(sp)
+;   fsd fs3,72(sp)
+;   fsd fs4,64(sp)
+;   fsd fs5,56(sp)
+;   fsd fs6,48(sp)
+;   fsd fs7,40(sp)
+;   fsd fs8,32(sp)
+;   fsd fs9,24(sp)
+;   fsd fs10,16(sp)
+;   fsd fs11,8(sp)
+; block0:
+;   fli.d fa0,-1.0
+;   fli.d fa1,min
+;   fli.d fa3,2^-16
+;   fli.d fa4,2^-15
+;   fli.d fa5,2^-8
+;   fli.d fa2,2^-7
+;   fli.d ft7,0.0625
+;   fli.d fa6,0.125
+;   fli.d fa7,0.25
+;   fli.d ft8,0.3125
+;   fli.d ft9,0.375
+;   fli.d ft10,0.4375
+;   fli.d ft11,0.5
+;   fli.d fs0,0.625
+;   fli.d fs1,0.75
+;   fli.d fs2,0.875
+;   fli.d fs3,1.0
+;   fli.d fs4,1.25
+;   fli.d fs5,1.5
+;   fli.d fs6,1.75
+;   fli.d fs7,2.0
+;   fli.d fs8,2.5
+;   fli.d fs9,3.0
+;   fli.d fs10,4.0
+;   fli.d fs11,8.0
+;   fli.d ft0,16.0
+;   fli.d ft1,128.0
+;   fli.d ft2,256.0
+;   fli.d ft3,32768.0
+;   fli.d ft4,65536.0
+;   fli.d ft5,inf
+;   fli.d ft6,nan
+;   fsd fa3,0(a0)
+;   fsd fa4,8(a0)
+;   fsd fa5,16(a0)
+;   fsd fa2,24(a0)
+;   fsd ft7,32(a0)
+;   fsd fa6,40(a0)
+;   fsd fa7,48(a0)
+;   fsd ft8,56(a0)
+;   fsd ft9,64(a0)
+;   fsd ft10,72(a0)
+;   fsd ft11,80(a0)
+;   fsd fs0,88(a0)
+;   fsd fs1,96(a0)
+;   fsd fs2,104(a0)
+;   fsd fs3,112(a0)
+;   fsd fs4,120(a0)
+;   fsd fs5,128(a0)
+;   fsd fs6,136(a0)
+;   fsd fs7,144(a0)
+;   fsd fs8,152(a0)
+;   fsd fs9,160(a0)
+;   fsd fs10,168(a0)
+;   fsd fs11,176(a0)
+;   fsd ft0,184(a0)
+;   fsd ft1,192(a0)
+;   fsd ft2,200(a0)
+;   fsd ft3,208(a0)
+;   fsd ft4,216(a0)
+;   fsd ft5,224(a0)
+;   fsd ft6,232(a0)
+;   fld fs0,88(sp)
+;   fld fs2,80(sp)
+;   fld fs3,72(sp)
+;   fld fs4,64(sp)
+;   fld fs5,56(sp)
+;   fld fs6,48(sp)
+;   fld fs7,40(sp)
+;   fld fs8,32(sp)
+;   fld fs9,24(sp)
+;   fld fs10,16(sp)
+;   fld fs11,8(sp)
+;   addi sp,sp,96
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   addi sp,sp,16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   mv s0, sp
+;   addi sp, sp, -0x60
+;   fsd fs0, 0x58(sp)
+;   fsd fs2, 0x50(sp)
+;   fsd fs3, 0x48(sp)
+;   fsd fs4, 0x40(sp)
+;   fsd fs5, 0x38(sp)
+;   fsd fs6, 0x30(sp)
+;   fsd fs7, 0x28(sp)
+;   fsd fs8, 0x20(sp)
+;   fsd fs9, 0x18(sp)
+;   fsd fs10, 0x10(sp)
+;   fsd fs11, 8(sp)
+; block1: ; offset 0x40
+;   .byte 0x53, 0x05, 0x10, 0xf2
+;   .byte 0xd3, 0x85, 0x10, 0xf2
+;   .byte 0xd3, 0x06, 0x11, 0xf2
+;   .byte 0x53, 0x87, 0x11, 0xf2
+;   .byte 0xd3, 0x07, 0x12, 0xf2
+;   .byte 0x53, 0x86, 0x12, 0xf2
+;   .byte 0xd3, 0x03, 0x13, 0xf2
+;   .byte 0x53, 0x88, 0x13, 0xf2
+;   .byte 0xd3, 0x08, 0x14, 0xf2
+;   .byte 0x53, 0x8e, 0x14, 0xf2
+;   .byte 0xd3, 0x0e, 0x15, 0xf2
+;   .byte 0x53, 0x8f, 0x15, 0xf2
+;   .byte 0xd3, 0x0f, 0x16, 0xf2
+;   .byte 0x53, 0x84, 0x16, 0xf2
+;   .byte 0xd3, 0x04, 0x17, 0xf2
+;   .byte 0x53, 0x89, 0x17, 0xf2
+;   .byte 0xd3, 0x09, 0x18, 0xf2
+;   .byte 0x53, 0x8a, 0x18, 0xf2
+;   .byte 0xd3, 0x0a, 0x19, 0xf2
+;   .byte 0x53, 0x8b, 0x19, 0xf2
+;   .byte 0xd3, 0x0b, 0x1a, 0xf2
+;   .byte 0x53, 0x8c, 0x1a, 0xf2
+;   .byte 0xd3, 0x0c, 0x1b, 0xf2
+;   .byte 0x53, 0x8d, 0x1b, 0xf2
+;   .byte 0xd3, 0x0d, 0x1c, 0xf2
+;   .byte 0x53, 0x80, 0x1c, 0xf2
+;   .byte 0xd3, 0x00, 0x1d, 0xf2
+;   .byte 0x53, 0x81, 0x1d, 0xf2
+;   .byte 0xd3, 0x01, 0x1e, 0xf2
+;   .byte 0x53, 0x82, 0x1e, 0xf2
+;   .byte 0xd3, 0x02, 0x1f, 0xf2
+;   .byte 0x53, 0x83, 0x1f, 0xf2
+;   fsd fa3, 0(a0)
+;   fsd fa4, 8(a0)
+;   fsd fa5, 0x10(a0)
+;   fsd fa2, 0x18(a0)
+;   fsd ft7, 0x20(a0)
+;   fsd fa6, 0x28(a0)
+;   fsd fa7, 0x30(a0)
+;   fsd ft8, 0x38(a0)
+;   fsd ft9, 0x40(a0)
+;   fsd ft10, 0x48(a0)
+;   fsd ft11, 0x50(a0)
+;   fsd fs0, 0x58(a0)
+;   fsd fs1, 0x60(a0)
+;   fsd fs2, 0x68(a0)
+;   fsd fs3, 0x70(a0)
+;   fsd fs4, 0x78(a0)
+;   fsd fs5, 0x80(a0)
+;   fsd fs6, 0x88(a0)
+;   fsd fs7, 0x90(a0)
+;   fsd fs8, 0x98(a0)
+;   fsd fs9, 0xa0(a0)
+;   fsd fs10, 0xa8(a0)
+;   fsd fs11, 0xb0(a0)
+;   fsd ft0, 0xb8(a0)
+;   fsd ft1, 0xc0(a0)
+;   fsd ft2, 0xc8(a0)
+;   fsd ft3, 0xd0(a0)
+;   fsd ft4, 0xd8(a0)
+;   fsd ft5, 0xe0(a0)
+;   fsd ft6, 0xe8(a0)
+;   fld fs0, 0x58(sp)
+;   fld fs2, 0x50(sp)
+;   fld fs3, 0x48(sp)
+;   fld fs4, 0x40(sp)
+;   fld fs5, 0x38(sp)
+;   fld fs6, 0x30(sp)
+;   fld fs7, 0x28(sp)
+;   fld fs8, 0x20(sp)
+;   fld fs9, 0x18(sp)
+;   fld fs10, 0x10(sp)
+;   fld fs11, 8(sp)
+;   addi sp, sp, 0x60
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/ceil.clif b/cranelift/filetests/filetests/runtests/ceil.clif
index 408f35bd174..9ac7f24a32a 100644
--- a/cranelift/filetests/filetests/runtests/ceil.clif
+++ b/cranelift/filetests/filetests/runtests/ceil.clif
@@ -7,6 +7,7 @@ target x86_64 sse42 has_avx
 target aarch64
 target s390x
 target riscv64
+target riscv64 has_zfa
 target riscv64 has_c has_zcb
 
 function %ceil_f32(f32) -> f32 {
diff --git a/cranelift/filetests/filetests/runtests/f32const.clif b/cranelift/filetests/filetests/runtests/f32const.clif
new file mode 100644
index 00000000000..208220e3327
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/f32const.clif
@@ -0,0 +1,61 @@
+test interpret
+test run
+target x86_64
+target aarch64
+target s390x
+target riscv64
+target riscv64 has_zfa
+target riscv64 has_c has_zcb
+
+
+;; These values are special for RISC-V since it has a dedicated
+;; instruction to generate them.
+
+function %special_f32_values() -> f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32 {
+block0:
+    v0 = f32const -0x1.p0
+    v1 = f32const 0x1.p-126
+    v2 = f32const 0x1.p-16
+    v3 = f32const 0x1.p-15
+    v4 = f32const 0x1.p-8
+    v5 = f32const 0x1.p-7
+    v6 = f32const 0x1.p-4
+    v7 = f32const 0x1.p-3
+    v8 = f32const 0x1.p-2
+    v9 = f32const 0x1.4p-2
+    v10 = f32const 0x1.8p-2
+    v11 = f32const 0x1.cp-2
+    v12 = f32const 0x1.p-1
+    v13 = f32const 0x1.4p-1
+    v14 = f32const 0x1.8p-1
+    v15 = f32const 0x1.cp-1
+    v16 = f32const 0x1.p0
+    v17 = f32const 0x1.4p0
+    v18 = f32const 0x1.8p0
+    v19 = f32const 0x1.cp0
+    v20 = f32const 0x1.p1
+    v21 = f32const 0x1.4p1
+    v22 = f32const 0x1.8p1
+    v23 = f32const 0x1.p2
+    v24 = f32const 0x1.p3
+    v25 = f32const 0x1.p4
+    v26 = f32const 0x1.p7
+    v27 = f32const 0x1.p8
+    v28 = f32const 0x1.p15
+    v29 = f32const 0x1.p16
+    v30 = f32const +Inf
+    v31 = f32const +NaN
+
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
+}
+
+; run: %special_f32_values() == [-0x1.p0, 0x1.p-126, 0x1.p-16, 0x1.p-15, 0x1.p-8, 0x1.p-7, 0x1.p-4, 0x1.p-3, 0x1.p-2, 0x1.4p-2, 0x1.8p-2, 0x1.cp-2, 0x1.p-1, 0x1.4p-1, 0x1.8p-1, 0x1.cp-1, 0x1.p0, 0x1.4p0, 0x1.8p0, 0x1.cp0, 0x1.p1, 0x1.4p1, 0x1.8p1, 0x1.p2, 0x1.p3, 0x1.p4, 0x1.p7, 0x1.p8, 0x1.p15, 0x1.p16, +Inf, +NaN]
+
+
+function %f32const_neg_nan() -> f32 {
+block0:
+    v0 = f32const -NaN
+    return v0
+}
+
+; run: %f32const_neg_nan() == -NaN
diff --git a/cranelift/filetests/filetests/runtests/f64const.clif b/cranelift/filetests/filetests/runtests/f64const.clif
new file mode 100644
index 00000000000..cb0d63a4443
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/f64const.clif
@@ -0,0 +1,60 @@
+test interpret
+test run
+target x86_64
+target aarch64
+target s390x
+target riscv64
+target riscv64 has_zfa
+target riscv64 has_c has_zcb
+
+
+;; These values are special for RISC-V since it has a dedicated
+;; instruction to generate them.
+
+function %special_f64_values() -> f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64, f64 {
+block0:
+    v0 = f64const -0x1.p0
+    v1 = f64const 0x1.p-1022
+    v2 = f64const 0x1.p-16
+    v3 = f64const 0x1.p-15
+    v4 = f64const 0x1.p-8
+    v5 = f64const 0x1.p-7
+    v6 = f64const 0x1.p-4
+    v7 = f64const 0x1.p-3
+    v8 = f64const 0x1.p-2
+    v9 = f64const 0x1.4p-2
+    v10 = f64const 0x1.8p-2
+    v11 = f64const 0x1.cp-2
+    v12 = f64const 0x1.p-1
+    v13 = f64const 0x1.4p-1
+    v14 = f64const 0x1.8p-1
+    v15 = f64const 0x1.cp-1
+    v16 = f64const 0x1.p0
+    v17 = f64const 0x1.4p0
+    v18 = f64const 0x1.8p0
+    v19 = f64const 0x1.cp0
+    v20 = f64const 0x1.p1
+    v21 = f64const 0x1.4p1
+    v22 = f64const 0x1.8p1
+    v23 = f64const 0x1.p2
+    v24 = f64const 0x1.p3
+    v25 = f64const 0x1.p4
+    v26 = f64const 0x1.p7
+    v27 = f64const 0x1.p8
+    v28 = f64const 0x1.p15
+    v29 = f64const 0x1.p16
+    v30 = f64const +Inf
+    v31 = f64const +NaN
+
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
+}
+
+; run: %special_f64_values() == [-0x1.p0, 0x1.p-1022, 0x1.p-16, 0x1.p-15, 0x1.p-8, 0x1.p-7, 0x1.p-4, 0x1.p-3, 0x1.p-2, 0x1.4p-2, 0x1.8p-2, 0x1.cp-2, 0x1.p-1, 0x1.4p-1, 0x1.8p-1, 0x1.cp-1, 0x1.p0, 0x1.4p0, 0x1.8p0, 0x1.cp0, 0x1.p1, 0x1.4p1, 0x1.8p1, 0x1.p2, 0x1.p3, 0x1.p4, 0x1.p7, 0x1.p8, 0x1.p15, 0x1.p16, +Inf, +NaN]
+
+function %f64const_neg_nan() -> f64 {
+block0:
+    v0 = f64const -NaN
+    return v0
+}
+
+; run: %f64const_neg_nan() == -NaN
diff --git a/cranelift/filetests/filetests/runtests/floor.clif b/cranelift/filetests/filetests/runtests/floor.clif
index 986061afbd0..3c49a0a9d21 100644
--- a/cranelift/filetests/filetests/runtests/floor.clif
+++ b/cranelift/filetests/filetests/runtests/floor.clif
@@ -7,6 +7,7 @@ target x86_64 sse42 has_avx
 target aarch64
 target s390x
 target riscv64
+target riscv64 has_zfa
 target riscv64 has_c has_zcb
 
 function %floor_f32(f32) -> f32 {
diff --git a/cranelift/filetests/filetests/runtests/nearest.clif b/cranelift/filetests/filetests/runtests/nearest.clif
index ee50e7b3864..90496d4070d 100644
--- a/cranelift/filetests/filetests/runtests/nearest.clif
+++ b/cranelift/filetests/filetests/runtests/nearest.clif
@@ -7,6 +7,7 @@ target x86_64 sse42 has_avx
 target aarch64
 target s390x
 target riscv64
+target riscv64 has_zfa
 target riscv64 has_c has_zcb
 
 function %nearest_f32(f32) -> f32 {
diff --git a/cranelift/filetests/filetests/runtests/trunc.clif b/cranelift/filetests/filetests/runtests/trunc.clif
index c858e6505d3..d2386d4258e 100644
--- a/cranelift/filetests/filetests/runtests/trunc.clif
+++ b/cranelift/filetests/filetests/runtests/trunc.clif
@@ -7,6 +7,7 @@ target x86_64 sse42 has_avx
 target aarch64
 target s390x
 target riscv64
+target riscv64 has_zfa
 target riscv64 has_c has_zcb
 
 function %trunc_f32(f32) -> f32 {