cranelift: Implement sqrt in interpreter (#4362)

This ignores SIMD for now.
bytecodealliance · Jul 1, 2022 · f2e6ff5 · f2e6ff5
1 parent 38ecd37
commit f2e6ff5
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 1 deletion.
diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs
@@ -763,6 +763,11 @@ impl Ieee32 {
     pub fn is_nan(&self) -> bool {
         f32::from_bits(self.0).is_nan()
     }
+
+    /// Returns the square root of self.
+    pub fn sqrt(self) -> Self {
+        Self::with_float(f32::from_bits(self.0).sqrt())
+    }
 }
 
 impl PartialOrd for Ieee32 {
@@ -848,6 +853,11 @@ impl Ieee64 {
     pub fn is_nan(&self) -> bool {
         f64::from_bits(self.0).is_nan()
     }
+
+    /// Returns the square root of self.
+    pub fn sqrt(self) -> Self {
+        Self::with_float(f64::from_bits(self.0).sqrt())
+    }
 }
 
 impl PartialOrd for Ieee64 {

diff --git a/cranelift/filetests/filetests/runtests/sqrt.clif b/cranelift/filetests/filetests/runtests/sqrt.clif
@@ -0,0 +1,97 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+function %sqrt_f32(f32) -> f32 {
+block0(v0: f32):
+    v1 = sqrt v0
+    return v1
+}
+; run: %sqrt_f32(0x9.0) == 0x3.0
+; run: %sqrt_f32(0x0.0) == 0x0.0
+; run: %sqrt_f32(-0x0.0) == -0x0.0
+; run: %sqrt_f32(+Inf) == +Inf
+
+; F32 Epsilon  / Max / Min Positive
+; run: %sqrt_f32(0x1.000000p-23) == 0x1.6a09e6p-12
+; run: %sqrt_f32(0x1.fffffep127) == 0x1.fffffep63
+; run: %sqrt_f32(0x1.000000p-126) == 0x1.000000p-63
+
+; F32 Subnormals
+; run: %sqrt_f32(0x0.800000p-126) == 0x1.6a09e6p-64
+; run: %sqrt_f32(0x0.000002p-126) == 0x1.6a09e6p-75
+
+
+; The IEEE754 Standard does not make a lot of guarantees about what
+; comes out of NaN producing operations, we just check if its a NaN
+function %sqrt_is_nan_f32(f32) -> i32 {
+block0(v0: f32):
+    v2 = sqrt v0
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %sqrt_is_nan_f32(-0x9.0) == 1
+; run: %sqrt_is_nan_f32(-Inf) == 1
+; run: %sqrt_is_nan_f32(+NaN) == 1
+; run: %sqrt_is_nan_f32(-NaN) == 1
+; run: %sqrt_is_nan_f32(+NaN:0x0) == 1
+; run: %sqrt_is_nan_f32(+NaN:0x1) == 1
+; run: %sqrt_is_nan_f32(+NaN:0x300001) == 1
+; run: %sqrt_is_nan_f32(-NaN:0x0) == 1
+; run: %sqrt_is_nan_f32(-NaN:0x1) == 1
+; run: %sqrt_is_nan_f32(-NaN:0x300001) == 1
+; run: %sqrt_is_nan_f32(+sNaN:0x1) == 1
+; run: %sqrt_is_nan_f32(-sNaN:0x1) == 1
+; run: %sqrt_is_nan_f32(+sNaN:0x200001) == 1
+; run: %sqrt_is_nan_f32(-sNaN:0x200001) == 1
+; run: %sqrt_is_nan_f32(-0x1.fffffep127) == 1
+
+
+
+function %sqrt_f64(f64) -> f64 {
+block0(v0: f64):
+    v1 = sqrt v0
+    return v1
+}
+; run: %sqrt_f64(0x9.0) == 0x3.0
+; run: %sqrt_f64(0x0.0) == 0x0.0
+; run: %sqrt_f64(-0x0.0) == -0x0.0
+; run: %sqrt_f64(+Inf) == +Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %sqrt_f64(0x1.0000000000000p-52) == 0x1.0000000000000p-26
+; run: %sqrt_f64(0x1.fffffffffffffp1023) == 0x1.fffffffffffffp511
+; run: %sqrt_f64(0x1.0000000000000p-1022) == 0x1.0000000000000p-511
+
+; F64 Subnormals
+; run: %sqrt_f64(0x0.8000000000000p-1022) == 0x1.6a09e667f3bcdp-512
+; run: %sqrt_f64(0x0.0000000000001p-1022) == 0x1.0000000000000p-537
+
+
+; The IEEE754 Standard does not make a lot of guarantees about what
+; comes out of NaN producing operations, we just check if its a NaN
+function %sqrt_is_nan_f64(f64) -> i32 {
+block0(v0: f64):
+    v2 = sqrt v0
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %sqrt_is_nan_f64(-0x9.0) == 1
+; run: %sqrt_is_nan_f64(-Inf) == 1
+; run: %sqrt_is_nan_f64(+NaN) == 1
+; run: %sqrt_is_nan_f64(-NaN) == 1
+; run: %sqrt_is_nan_f64(+NaN:0x0) == 1
+; run: %sqrt_is_nan_f64(+NaN:0x1) == 1
+; run: %sqrt_is_nan_f64(+NaN:0x4000000000001) == 1
+; run: %sqrt_is_nan_f64(-NaN:0x0) == 1
+; run: %sqrt_is_nan_f64(-NaN:0x1) == 1
+; run: %sqrt_is_nan_f64(-NaN:0x4000000000001) == 1
+; run: %sqrt_is_nan_f64(+sNaN:0x1) == 1
+; run: %sqrt_is_nan_f64(-sNaN:0x1) == 1
+; run: %sqrt_is_nan_f64(+sNaN:0x4000000000001) == 1
+; run: %sqrt_is_nan_f64(-sNaN:0x4000000000001) == 1
+; run: %sqrt_is_nan_f64(-0x1.fffffffffffffp1023) == 1
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
@@ -678,7 +678,7 @@ where
         Opcode::Fsub => binary(Value::sub, arg(0)?, arg(1)?)?,
         Opcode::Fmul => binary(Value::mul, arg(0)?, arg(1)?)?,
         Opcode::Fdiv => binary(Value::div, arg(0)?, arg(1)?)?,
-        Opcode::Sqrt => unimplemented!("Sqrt"),
+        Opcode::Sqrt => assign(Value::sqrt(arg(0)?)?),
         Opcode::Fma => unimplemented!("Fma"),
         Opcode::Fneg => binary(Value::sub, Value::float(0, ctrl_ty)?, arg(0)?)?,
         Opcode::Fabs => unimplemented!("Fabs"),

diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
@@ -50,6 +50,7 @@ pub trait Value: Clone + From<DataValue> {
     fn mul(self, other: Self) -> ValueResult<Self>;
     fn div(self, other: Self) -> ValueResult<Self>;
     fn rem(self, other: Self) -> ValueResult<Self>;
+    fn sqrt(self) -> ValueResult<Self>;
 
     // Saturating arithmetic.
     fn add_sat(self, other: Self) -> ValueResult<Self>;
@@ -275,6 +276,8 @@ impl Value for DataValue {
                 (DataValue::I64(n), types::I32) => DataValue::I32(i32::try_from(n)?),
                 (DataValue::I64(n), types::I64) => DataValue::I64(n),
                 (DataValue::I64(n), types::I128) => DataValue::I128(n as i128),
+                (DataValue::F32(n), types::I32) => DataValue::I32(n.bits() as i32),
+                (DataValue::F64(n), types::I64) => DataValue::I64(n.bits() as i64),
                 (DataValue::B(b), t) if t.is_bool() => DataValue::B(b),
                 (DataValue::B(b), t) if t.is_int() => {
                     // Bools are represented in memory as all 1's
@@ -461,6 +464,10 @@ impl Value for DataValue {
         binary_match!(%(&self, &other); [I8, I16, I32, I64])
     }
 
+    fn sqrt(self) -> ValueResult<Self> {
+        unary_match!(sqrt(&self); [F32, F64]; [Ieee32, Ieee64])
+    }
+
     fn add_sat(self, other: Self) -> ValueResult<Self> {
         binary_match!(saturating_add(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
     }