diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index b4856ff613..d8f418f729 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -826,12 +826,109 @@ pub struct UniformFloat { scale: X, } +trait Summable { + fn compensated_sum(&self) -> T; +} + +trait ScaleComputable { + fn compute_scale(low: T, high: T) -> T; +} + macro_rules! uniform_float_impl { ($ty:ty, $uty:ident, $f_scalar:ident, $u_scalar:ident, $bits_to_discard:expr) => { impl SampleUniform for $ty { type Sampler = UniformFloat<$ty>; } + impl Summable<$ty> for &[$ty] { + fn compensated_sum(&self) -> $ty { + // Kahan compensated sum + let mut sum = <$ty>::splat(0.0); + let mut c = <$ty>::splat(0.0); + for val in *self { + let y = val - c; + let t = sum + y; + c = (t - sum) - y; + sum = t; + } + sum + } + } + + impl ScaleComputable<$ty> for $ty { + fn compute_scale(low: $ty, high: $ty) -> $ty { + let eps = <$ty>::splat($f_scalar::EPSILON); + + // `max_rand` is 1.0 - eps. This is actually the second largest + // float less than 1.0, because the spacing of the floats in the + // interval [0.5, 1.0) is `eps/2`. + let max_rand = <$ty>::splat( + (::core::$u_scalar::MAX >> $bits_to_discard).into_float_with_exponent(0) - 1.0, + ); + + // `delta_high` is half the distance from `high` to the largest + // float that is less than `high`. If `high` is subnormal or 0, + // then `delta_high` will be 0. Why this is needed is explained + // below. + let delta_high = <$ty>::splat(0.5) * (high - high.utils_next_down()); + + // We want `scale * max_rand + low < high`. Let `high_1` be the + // (hypothetical) float that is the midpoint between `high` and + // the largest float less than `high`. The midpoint is used + // because any float calculation that would result in a value in + // `(high_1, high)` would be rounded to `high`. The ideal + // condition for upper bound of `scale` is then + // scale * max_rand + low = high_1` + // or + // scale = (high_1 - low)/max_rand + // + // Write `high_1 = high - delta_high`, `max_rand = 1 - eps`, + // and approximate `1/(1 - eps)` as `(1 + eps)`. Then we have + // + // scale = (high - delta_high - low)*(1 + eps) + // = high - low + eps*high - eps*low - delta_high + // + // (The extremely small term `-delta_high*eps` has been ignored.) + // The following uses Kahan's compensated summation to compute `scale` + // from those terms. + let terms: &[$ty] = &[high, -low, eps * high, -eps * low, -delta_high]; + let mut scale = terms.compensated_sum(); + + // Empirical tests show that `scale` is generally within 1 or 2 ULPs + // of the "ideal" scale. Next we adjust `scale`, if necessary, to + // the ideal value. + + // Check that `scale * max_rand + low` is less than `high`. If it is + // not, repeatedly adjust `scale` down by one ULP until the condition + // is satisfied. Generally this requires 0 or 1 adjustments to `scale`. + // (The original `too_big_mask` is saved so we can use it again below.) + let too_big_mask = (scale * max_rand + low).ge_mask(high); + loop { + let mask = (scale * max_rand + low).ge_mask(high); + if !mask.any() { + break; + } + scale = scale.decrease_masked(mask); + } + // We have ensured that `scale * max_rand + low < high`. Now see if + // we can increase `scale` and still maintain that inequality. We + // only need to do this if `scale` was not initially too big. + let not_too_big_mask = !too_big_mask; + let mut mask = not_too_big_mask; + if mask.any() { + loop { + let next_scale = scale.increase_masked(mask); + mask = (next_scale * max_rand + low).lt_mask(high) & not_too_big_mask; + if !mask.any() { + break; + } + scale = scale.increase_masked(mask); + } + } + scale + } + } + impl UniformSampler for UniformFloat<$ty> { type X = $ty; @@ -849,22 +946,12 @@ macro_rules! uniform_float_impl { if !(low.all_lt(high)) { return Err(Error::EmptyRange); } - let max_rand = <$ty>::splat( - (::core::$u_scalar::MAX >> $bits_to_discard).into_float_with_exponent(0) - 1.0, - ); - let mut scale = high - low; - if !(scale.all_finite()) { + if !((high - low).all_finite()) { return Err(Error::NonFinite); } - loop { - let mask = (scale * max_rand + low).ge_mask(high); - if !mask.any() { - break; - } - scale = scale.decrease_masked(mask); - } + let scale = <$ty>::compute_scale(low, high); debug_assert!(<$ty>::splat(0.0).all_le(scale)); @@ -1430,6 +1517,71 @@ mod tests { } } + macro_rules! compute_scale_tests { + ($($fname:ident: $ty:ident,)*) => { + $( + #[test] + fn $fname() { + // For each `(low, high)` pair in `v`, compute `scale` and + // verify that + // scale * max_rand + low < high + // and + // next_up(scale) * max_rand + low >= high + let eps = $ty::EPSILON; + let v = [ + (0.0 as $ty, 100.0 as $ty), + (-0.125 as $ty, 0.0 as $ty), + (0.0 as $ty, 0.125 as $ty), + (-1.5 as $ty, -0.0 as $ty), + (-0.0 as $ty, 1.5 as $ty), + (-1.0 as $ty, -0.875 as $ty), + (-1e35 as $ty, -1e25 as $ty), + (1e-35 as $ty, 1e-25 as $ty), + (-1e35 as $ty, 1e35 as $ty), + // This one is mainly for f64, because these particular + // numbers provided an example where the initial calculation + // of `scale` is too small: + (11.0 as $ty, 1001.000000005 as $ty), + // Very small intervals--the difference `high - low` is + // a small to moderate multiple of the type's EPSILON. + (1.0 as $ty - (11.5 as $ty) * eps, 1.0 as $ty - (0.5 as $ty) * eps), + (1.0 as $ty - (196389.0 as $ty) * eps / (2.0 as $ty), 1.0 as $ty), + (1.0 as $ty, 1.0 as $ty + (1.0 as $ty) * eps), + (1.0 as $ty, 1.0 as $ty + (2.0 as $ty) * eps), + (1.0 as $ty - eps, 1.0 as $ty), + (1.0 as $ty - eps, 1.0 as $ty + (2.0 as $ty) * eps), + (-1.0 as $ty, -1.0 as $ty + (2.0 as $ty) * eps), + (-2.0 as $ty, -2.0 as $ty + (17.0 as $ty) * eps), + (-11.0 as $ty, -11.0 as $ty + (68.0 as $ty) *eps), + // Ridiculously small intervals: `low` and `high` are subnormal. + (-$ty::from_bits(3), $ty::from_bits(8)), + (-$ty::from_bits(5), -$ty::from_bits(1)), + // `high - low` is a significant fraction of the type's MAX. + ((0.5 as $ty) * $ty::MIN, (0.25 as $ty) * $ty::MAX), + ((0.25 as $ty) * $ty::MIN, (0.5 as $ty) * $ty::MAX), + ((0.5 as $ty) * $ty::MIN, (0.4999995 as $ty) * $ty::MAX), + ((0.75 as $ty) * $ty::MIN, 0.0 as $ty), + (0.0 as $ty, (0.75 as $ty) * $ty::MAX), + ]; + let max_rand = 1.0 as $ty - eps; + for (low, high) in v { + let scale = <$ty>::compute_scale(low, high); + assert!(scale > 0.0 as $ty); + assert!(scale * max_rand + low < high); + // let next_scale = scale.next_up(); + let next_scale = <$ty>::from_bits(scale.to_bits() + 1); + assert!(next_scale * max_rand + low >= high); + } + } + )* + } + } + + compute_scale_tests! { + test_compute_scale_f32: f32, + test_compute_scale_f64: f64, + } + #[test] fn test_float_overflow() { assert_eq!(Uniform::try_from(::core::f64::MIN..::core::f64::MAX), Err(Error::NonFinite)); diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs index bddb0a4a59..57e6525709 100644 --- a/src/distributions/utils.rs +++ b/src/distributions/utils.rs @@ -10,7 +10,6 @@ #[cfg(feature = "simd_support")] use core::simd::*; - pub(crate) trait WideningMultiply { type Output; @@ -217,12 +216,22 @@ pub(crate) trait FloatSIMDUtils { fn finite_mask(self) -> Self::Mask; fn gt_mask(self, other: Self) -> Self::Mask; fn ge_mask(self, other: Self) -> Self::Mask; + fn lt_mask(self, other: Self) -> Self::Mask; // Decrease all lanes where the mask is `true` to the next lower value // representable by the floating-point type. At least one of the lanes // must be set. fn decrease_masked(self, mask: Self::Mask) -> Self; + // Increase all lanes where the mask is `true` to the next higher value + // representable by the floating-point type. At least one of the lanes + // must be set. + fn increase_masked(self, mask: Self::Mask) -> Self; + + // Similar to the proposed `next_down()` method for rust's f32 and f64, + // but this implementation does not handle `inf` or `nan`. + fn utils_next_down(self) -> Self; + // Convert from int value. Conversion is done while retaining the numerical // value, not by retaining the binary representation. type UInt; @@ -340,12 +349,37 @@ macro_rules! scalar_float_impl { self >= other } + #[inline(always)] + fn lt_mask(self, other: Self) -> Self::Mask { + self < other + } + #[inline(always)] fn decrease_masked(self, mask: Self::Mask) -> Self { debug_assert!(mask, "At least one lane must be set"); <$ty>::from_bits(self.to_bits() - 1) } + #[inline(always)] + fn increase_masked(self, mask: Self::Mask) -> Self { + debug_assert!(mask, "At least one lane must be set"); + <$ty>::from_bits(self.to_bits() + 1) + } + + #[inline(always)] + fn utils_next_down(self) -> Self { + // This is not a drop-in replacement for the next_down() method + // proposed for rust (https://github.com/rust-lang/rust/issues/91399). + // This functions assumes that the input is not nan or inf. + if self > 0.0 { + <$ty>::from_bits(self.to_bits() - 1) + } else if self < 0.0 { + <$ty>::from_bits(self.to_bits() + 1) + } else { + <$ty>::from_bits((-0.0 as $ty).to_bits() + 1) + } + } + #[inline] fn cast_from_int(i: Self::UInt) -> Self { i as $ty @@ -412,6 +446,11 @@ macro_rules! simd_impl { self.simd_ge(other) } + #[inline(always)] + fn lt_mask(self, other: Self) -> Self::Mask { + self.simd_lt(other) + } + #[inline(always)] fn decrease_masked(self, mask: Self::Mask) -> Self { // Casting a mask into ints will produce all bits set for @@ -424,6 +463,34 @@ macro_rules! simd_impl { Self::from_bits(self.to_bits() + mask.to_int().cast()) } + #[inline(always)] + fn increase_masked(self, mask: Self::Mask) -> Self { + debug_assert!(mask.any(), "At least one lane must be set"); + let zero = Simd::<$uty, LANES>::splat(0); + let one = Simd::<$uty, LANES>::splat(1); + Self::from_bits(self.to_bits() + mask.select(one, zero)) + } + + #[inline(always)] + fn utils_next_down(self) -> Self { + // This is not a drop-in replacement for the next_down() method + // proposed for rust (https://github.com/rust-lang/rust/issues/91399). + // This function assumes that no values are nan or inf. + let zero = Self::splat(0.0 as $fty); + let pos_mask = self.simd_gt(zero); + let neg_mask = self.simd_lt(zero); + let zero_mask = self.simd_eq(zero); // Could be +0.0 or -0.0 + let mut bits = self.to_bits(); + bits += pos_mask.to_int().cast(); + bits += (-neg_mask.to_int()).cast(); + // Shenanigans so both +0.0 and -0.0 end up as next_down(0.0). + // The bit patterns for 0.0 and -0.0 are 000...000 and 100...000, resp. + // We want both of these to result in 100...001. + let zero_next_down = (1 << ($uty::BITS - 1)) | 1; // This is 100...001 + bits |= (zero_mask.to_int().cast()) & Simd::<$uty, LANES>::splat(zero_next_down); + Self::from_bits(bits) + } + #[inline] fn cast_from_int(i: Self::UInt) -> Self { i.cast() @@ -447,3 +514,143 @@ macro_rules! simd_impl { simd_impl!(f32, u32); #[cfg(feature = "simd_support")] simd_impl!(f64, u64); + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! scalar_increase_masked_tests { + ($($fname:ident: $value:expr,)*) => { + $( + #[test] + fn $fname() { + let (input, expected) = $value; + assert_eq!(input.increase_masked(true), expected); + } + )* + } + } + + scalar_increase_masked_tests! { + scalar_increase_masked_case0: (1.0f64, 1.0f64 + f64::EPSILON), + scalar_increase_masked_case1: (1.0f32, 1.0f32 + f32::EPSILON), + scalar_increase_masked_case2: (0.625f64, 0.625f64 + 0.5f64 * f64::EPSILON), + scalar_increase_masked_case3: (0.625f32, 0.625f32 + 0.5f32 * f32::EPSILON), + scalar_increase_masked_case4: (f64::from_bits(1), f64::from_bits(2)), + scalar_increase_masked_case5: (f32::from_bits(1), f32::from_bits(2)), + scalar_increase_masked_case6: (0.0f64, f64::from_bits(1)), + scalar_increase_masked_case7: (0.0f32, f32::from_bits(1)), + } + + macro_rules! simd_increase_masked_tests { + ($($fname:ident: ($ty:ty, $f_scalar:ident),)*) => { + $( + #[test] + #[cfg(feature = "simd_support")] + fn $fname() { + let values = [ + 10.5 as $f_scalar, 1.0 as $f_scalar, 1.0e-3 as $f_scalar, $f_scalar::from_bits(1), 0.0 as $f_scalar + ]; + for input0 in values { + let x = <$ty>::splat(input0); + // Create a mask with just the first lane set. + let mut mask = Mask::from_array([false; <$ty>::LANES]); + mask.set(0, true); + + let y = x.increase_masked(mask); + + // Independently create the expected result, based on applying + // increase_masked() to the scalar value in channel 0. + let mut xa = x.to_array(); + xa[0] = xa[0].increase_masked(true); + let expected = Simd::from_array(xa); + + assert_eq!(y, expected); + } + } + )* + } + } + + simd_increase_masked_tests! { + simd_increase_masked_case0: (f32x2, f32), + simd_increase_masked_case1: (f32x4, f32), + simd_increase_masked_case2: (f32x8, f32), + simd_increase_masked_case3: (f32x16, f32), + simd_increase_masked_case4: (f64x2, f64), + simd_increase_masked_case5: (f64x4, f64), + simd_increase_masked_case6: (f64x8, f64), + } + + macro_rules! scalar_utils_next_down_tests { + ($($fname:ident: $value:expr,)*) => { + $( + #[test] + fn $fname() { + let (input, expected) = $value; + assert_eq!(input.utils_next_down(), expected); + } + )* + } + } + + scalar_utils_next_down_tests! { + utils_next_down_case0: (3000.0f32, 2999.9998f32), + utils_next_down_case1: (-3000.0f32, -3000.0002f32), + utils_next_down_case2: (3000.0f64, 2999.9999999999995f64), + utils_next_down_case3: (-3000.0f64, -3000.0000000000005f64), + utils_next_down_case4: (1.0f64, 1.0f64 - 0.5f64 * f64::EPSILON), + utils_next_down_case5: (1.0f32, 1.0f32 - 0.5f32 * f32::EPSILON), + utils_next_down_case6: (-1.0f64, -1.0f64 - f64::EPSILON), + utils_next_down_case7: (-1.0f32, -1.0f32 - f32::EPSILON), + utils_next_down_case8: (0.625f64, 0.625f64 - 0.5f64 * f64::EPSILON), + utils_next_down_case9: (0.625f32, 0.625f32 - 0.5f32 * f32::EPSILON), + utils_next_down_case10: (-0.625f64, -0.625f64 - 0.5f64 * f64::EPSILON), + utils_next_down_case11: (-0.625f32, -0.625f32 - 0.5f32 * f32::EPSILON), + utils_next_down_case12: (f64::from_bits(2), f64::from_bits(1)), + utils_next_down_case13: (f32::from_bits(2), f32::from_bits(1)), + utils_next_down_case14: (f64::from_bits(1), 0.0f64), + utils_next_down_case15: (f32::from_bits(1), 0.0f32), + utils_next_down_case16: (0.0f64, -f64::from_bits(1)), + utils_next_down_case17: (0.0f32, -f32::from_bits(1)), + utils_next_down_case18: (-0.0f64, -f64::from_bits(1)), + utils_next_down_case19: (-0.0f32, -f32::from_bits(1)), + } + + macro_rules! simd_utils_next_down_tests { + ($($fname:ident: ($ty:ty, $f_scalar:ident),)*) => { + $( + #[test] + #[cfg(feature = "simd_support")] + fn $fname() { + let values = [ + 10.5 as $f_scalar, 1.0 as $f_scalar, 1.0e-3 as $f_scalar, $f_scalar::from_bits(1), 0.0 as $f_scalar, + -10.5 as $f_scalar, -1.0 as $f_scalar, -1.0e-3 as $f_scalar, -$f_scalar::from_bits(1), -0.0 as $f_scalar, + ]; + // Test that the vector version gives the sames results as + // the scalar version. Create test vectors that use two values + // at a time from `values`. + for k in 0..(values.len() - 1) { + let c1 = <$ty>::splat(values[k]); + let c2 = <$ty>::splat(values[k + 1]); + let (x1, _x2) = c1.interleave(c2); + let y1 = x1.utils_next_down(); + for i in 0..<$ty>::LANES { + assert_eq!(y1.extract(i), x1.extract(i).utils_next_down()); + } + } + } + )* + } + } + + simd_utils_next_down_tests! { + simd_utils_next_down_case0: (f32x2, f32), + simd_utils_next_down_case1: (f32x4, f32), + simd_utils_next_down_case2: (f32x8, f32), + simd_utils_next_down_case3: (f32x16, f32), + simd_utils_next_down_case4: (f64x2, f64), + simd_utils_next_down_case5: (f64x4, f64), + simd_utils_next_down_case6: (f64x8, f64), + } +}