diff --git a/benches/distributions.rs b/benches/distributions.rs index ee4a039c13c..af6ea0015f0 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -149,6 +149,11 @@ distr_int!(distr_uniform_i16, i16, Uniform::new(-500i16, 2000)); distr_int!(distr_uniform_i32, i32, Uniform::new(-200_000_000i32, 800_000_000)); distr_int!(distr_uniform_i64, i64, Uniform::new(3i64, 123_456_789_123)); distr_int!(distr_uniform_i128, i128, Uniform::new(-123_456_789_123i128, 123_456_789_123_456_789)); +distr_int!(distr_uniform_usize16, usize, Uniform::new(0usize, 0xb9d7)); +distr_int!(distr_uniform_usize32, usize, Uniform::new(0usize, 0x548c0f43)); +#[cfg(target_pointer_width = "64")] +distr_int!(distr_uniform_usize64, usize, Uniform::new(0usize, 0x3a42714f2bf927a8)); +distr_int!(distr_uniform_isize, isize, Uniform::new(-1060478432isize, 1858574057)); distr_float!(distr_uniform_f32, f32, Uniform::new(2.26f32, 2.319)); distr_float!(distr_uniform_f64, f64, Uniform::new(2.26f64, 2.319)); diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index b8559d36280..c1a1b2594e0 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -246,14 +246,11 @@ pub trait UniformSampler: Sized { /// Sample a single value uniformly from a range with inclusive lower bound /// and exclusive upper bound `[low, high)`. /// - /// Usually users should not call this directly but instead use - /// `Uniform::sample_single`, which asserts that `low < high` before calling - /// this. - /// - /// Via this method, implementations can provide a method optimized for - /// sampling only a single value from the specified range. The default - /// implementation simply calls `UniformSampler::new` then `sample` on the - /// result. + /// By default this is implemented using + /// `UniformSampler::new(low, high).sample(rng)`. However, for some types + /// more optimal implementations for single usage may be provided via this + /// method (which is the case for integers and floats). + /// Results may not be identical. fn sample_single(low: B1, high: B2, rng: &mut R) -> Self::X where B1: SampleBorrow + Sized, @@ -309,31 +306,29 @@ impl<'a, Borrowed> SampleBorrow for &'a Borrowed where Borrowed: Sampl /// /// # Implementation notes /// +/// For simplicity, we use the same generic struct `UniformInt` for all +/// integer types `X`. This gives us only one field type, `X`; to store unsigned +/// values of this size, we take use the fact that these conversions are no-ops. +/// /// For a closed range, the number of possible numbers we should generate is -/// `range = (high - low + 1)`. It is not possible to end up with a uniform -/// distribution if we map *all* the random integers that can be generated to -/// this range. We have to map integers from a `zone` that is a multiple of the -/// range. The rest of the integers, that cause a bias, are rejected. +/// `range = (high - low + 1)`. To avoid bias, we must ensure that the size of +/// our sample space, `zone`, is a multiple of `range`; other values must be +/// rejected (by replacing with a new random sample). /// -/// The problem with `range` is that to cover the full range of the type, it has -/// to store `unsigned_max + 1`, which can't be represented. But if the range -/// covers the full range of the type, no modulus is needed. A range of size 0 -/// can't exist, so we use that to represent this special case. Wrapping -/// arithmetic even makes representing `unsigned_max + 1` as 0 simple. +/// As a special case, we use `range = 0` to represent the full range of the +/// result type (i.e. for `new_inclusive($ty::MIN, $ty::MAX)`). /// -/// We don't calculate `zone` directly, but first calculate the number of -/// integers to reject. To handle `unsigned_max + 1` not fitting in the type, -/// we use: -/// `ints_to_reject = (unsigned_max + 1) % range;` -/// `ints_to_reject = (unsigned_max - range + 1) % range;` +/// The optimum `zone` is the largest product of `range` which fits in our +/// (unsigned) target type. We calculate this by calculating how many numbers we +/// must reject: `reject = (MAX + 1) % range = (MAX - range + 1) % range`. Any (large) +/// product of `range` will suffice, thus in `sample_single` we multiply by a +/// power of 2 via bit-shifting (faster but may cause more rejections). /// -/// The smallest integer PRNGs generate is `u32`. That is why for small integer -/// sizes (`i8`/`u8` and `i16`/`u16`) there is an optimization: don't pick the -/// largest zone that can fit in the small type, but pick the largest zone that -/// can fit in an `u32`. `ints_to_reject` is always less than half the size of -/// the small integer. This means the first bit of `zone` is always 1, and so -/// are all the other preceding bits of a larger integer. The easiest way to -/// grow the `zone` for the larger type is to simply sign extend it. +/// The smallest integer PRNGs generate is `u32`. For 8- and 16-bit outputs we +/// use `u32` for our `zone` and samples (because it's not slower and because +/// it reduces the chance of having to reject a sample). In this case we cannot +/// store `zone` in the target type since it is too large, however we know +/// `ints_to_reject < range <= $unsigned::MAX`. /// /// An alternative to using a modulus is widening multiply: After a widening /// multiply by `range`, the result is in the high word. Then comparing the low @@ -342,12 +337,11 @@ impl<'a, Borrowed> SampleBorrow for &'a Borrowed where Borrowed: Sampl pub struct UniformInt { low: X, range: X, - zone: X, + z: X, // either ints_to_reject or zone depending on implementation } macro_rules! uniform_int_impl { - ($ty:ty, $signed:ty, $unsigned:ident, - $i_large:ident, $u_large:ident) => { + ($ty:ty, $unsigned:ident, $u_large:ident) => { impl SampleUniform for $ty { type Sampler = UniformInt<$ty>; } @@ -382,34 +376,30 @@ macro_rules! uniform_int_impl { let high = *high_b.borrow(); assert!(low <= high, "Uniform::new_inclusive called with `low > high`"); - let unsigned_max = ::core::$unsigned::MAX; + let unsigned_max = ::core::$u_large::MAX; let range = high.wrapping_sub(low).wrapping_add(1) as $unsigned; let ints_to_reject = if range > 0 { + let range = range as $u_large; (unsigned_max - range + 1) % range } else { 0 }; - let zone = unsigned_max - ints_to_reject; UniformInt { low: low, // These are really $unsigned values, but store as $ty: range: range as $ty, - zone: zone as $ty + z: ints_to_reject as $unsigned as $ty } } fn sample(&self, rng: &mut R) -> Self::X { let range = self.range as $unsigned as $u_large; if range > 0 { - // Grow `zone` to fit a type of at least 32 bits, by - // sign-extending it (the first bit is always 1, so are all - // the preceding bits of the larger type). - // For types that already have the right size, all the - // casting is a no-op. - let zone = self.zone as $signed as $i_large as $u_large; + let unsigned_max = ::core::$u_large::MAX; + let zone = unsigned_max - (self.z as $unsigned as $u_large); loop { let v: $u_large = rng.gen(); let (hi, lo) = v.wmul(range); @@ -431,7 +421,7 @@ macro_rules! uniform_int_impl { let low = *low_b.borrow(); let high = *high_b.borrow(); assert!(low < high, - "Uniform::sample_single called with low >= high"); + "UniformSampler::sample_single: low >= high"); let range = high.wrapping_sub(low) as $unsigned as $u_large; let zone = if ::core::$unsigned::MAX <= ::core::u16::MAX as $unsigned { @@ -459,20 +449,20 @@ macro_rules! uniform_int_impl { } } -uniform_int_impl! { i8, i8, u8, i32, u32 } -uniform_int_impl! { i16, i16, u16, i32, u32 } -uniform_int_impl! { i32, i32, u32, i32, u32 } -uniform_int_impl! { i64, i64, u64, i64, u64 } +uniform_int_impl! { i8, u8, u32 } +uniform_int_impl! { i16, u16, u32 } +uniform_int_impl! { i32, u32, u32 } +uniform_int_impl! { i64, u64, u64 } #[cfg(all(rustc_1_26, not(target_os = "emscripten")))] -uniform_int_impl! { i128, i128, u128, u128, u128 } -uniform_int_impl! { isize, isize, usize, isize, usize } -uniform_int_impl! { u8, i8, u8, i32, u32 } -uniform_int_impl! { u16, i16, u16, i32, u32 } -uniform_int_impl! { u32, i32, u32, i32, u32 } -uniform_int_impl! { u64, i64, u64, i64, u64 } -uniform_int_impl! { usize, isize, usize, isize, usize } +uniform_int_impl! { i128, u128, u128 } +uniform_int_impl! { isize, usize, usize } +uniform_int_impl! { u8, u8, u32 } +uniform_int_impl! { u16, u16, u32 } +uniform_int_impl! { u32, u32, u32 } +uniform_int_impl! { u64, u64, u64 } +uniform_int_impl! { usize, usize, usize } #[cfg(all(rustc_1_26, not(target_os = "emscripten")))] -uniform_int_impl! { u128, u128, u128, i128, u128 } +uniform_int_impl! { u128, u128, u128 } #[cfg(all(feature = "simd_support", feature = "nightly"))] macro_rules! uniform_simd_int_impl { @@ -534,13 +524,13 @@ macro_rules! uniform_simd_int_impl { low: low, // These are really $unsigned values, but store as $ty: range: range.cast(), - zone: zone.cast(), + z: zone.cast(), } } fn sample(&self, rng: &mut R) -> Self::X { let range: $unsigned = self.range.cast(); - let zone: $unsigned = self.zone.cast(); + let zone: $unsigned = self.z.cast(); // This might seem very slow, generating a whole new // SIMD vector for every sample rejection. For most uses @@ -736,7 +726,7 @@ macro_rules! uniform_float_impl { let low = *low_b.borrow(); let high = *high_b.borrow(); assert!(low.all_lt(high), - "Uniform::sample_single called with low >= high"); + "UniformSampler::sample_single: low >= high"); let mut scale = high - low; loop { @@ -787,7 +777,7 @@ macro_rules! uniform_float_impl { let mask = !scale.finite_mask(); if mask.any() { assert!(low.all_finite() && high.all_finite(), - "Uniform::sample_single called with non-finite boundaries"); + "Uniform::sample_single: low and high must be finite"); scale = scale.decrease_masked(mask); } } diff --git a/src/distributions/weighted/alias_method.rs b/src/distributions/weighted/alias_method.rs index fef4b619b0d..2e58d84ceca 100644 --- a/src/distributions/weighted/alias_method.rs +++ b/src/distributions/weighted/alias_method.rs @@ -25,10 +25,10 @@ use Rng; /// Given that `n` is the number of items in the vector used to create an /// [`WeightedIndex`], [`WeightedIndex`] will require `O(n)` amount of /// memory. More specifically it takes up some constant amount of memory plus -/// the vector used to create it and a [`Vec`] with capacity `n`. +/// the vector used to create it and a [`Vec`] with capacity `n`. /// /// Time complexity for the creation of a [`WeightedIndex`] is `O(n)`. -/// Sampling is `O(1)`, it makes a call to [`Uniform::sample`] and a call +/// Sampling is `O(1)`, it makes a call to [`Uniform::sample`] and a call /// to [`Uniform::sample`]. /// /// # Example @@ -56,13 +56,13 @@ use Rng; /// /// [`WeightedIndex`]: crate::distributions::weighted::alias_method::WeightedIndex /// [`Weight`]: crate::distributions::weighted::alias_method::Weight -/// [`Vec`]: Vec -/// [`Uniform::sample`]: Distribution::sample +/// [`Vec`]: Vec +/// [`Uniform::sample`]: Distribution::sample /// [`Uniform::sample`]: Distribution::sample pub struct WeightedIndex { - aliases: Vec, + aliases: Vec, no_alias_odds: Vec, - uniform_index: Uniform, + uniform_index: Uniform, uniform_within_weight_sum: Uniform, } @@ -71,6 +71,7 @@ impl WeightedIndex { /// /// Returns an error if: /// - The vector is empty. + /// - The vector is longer than `u32::MAX`. /// - For any weight `w`: `w < 0` or `w > max` where `max = W::MAX / /// weights.len()`. /// - The sum of weights is zero. @@ -78,9 +79,12 @@ impl WeightedIndex { let n = weights.len(); if n == 0 { return Err(WeightedError::NoItem); + } else if n > ::core::u32::MAX as usize { + return Err(WeightedError::TooMany); } + let n = n as u32; - let max_weight_size = W::try_from_usize_lossy(n) + let max_weight_size = W::try_from_u32_lossy(n) .map(|n| W::MAX / n) .unwrap_or(W::ZERO); if !weights @@ -103,7 +107,7 @@ impl WeightedIndex { } // `weight_sum` would have been zero if `try_from_lossy` causes an error here. - let n_converted = W::try_from_usize_lossy(n).unwrap(); + let n_converted = W::try_from_u32_lossy(n).unwrap(); let mut no_alias_odds = weights; for odds in no_alias_odds.iter_mut() { @@ -119,52 +123,52 @@ impl WeightedIndex { /// be ensured that a single index is only ever in one of them at the /// same time. struct Aliases { - aliases: Vec, - smalls_head: usize, - bigs_head: usize, + aliases: Vec, + smalls_head: u32, + bigs_head: u32, } impl Aliases { - fn new(size: usize) -> Self { + fn new(size: u32) -> Self { Aliases { - aliases: vec![0; size], - smalls_head: ::core::usize::MAX, - bigs_head: ::core::usize::MAX, + aliases: vec![0; size as usize], + smalls_head: ::core::u32::MAX, + bigs_head: ::core::u32::MAX, } } - fn push_small(&mut self, idx: usize) { - self.aliases[idx] = self.smalls_head; + fn push_small(&mut self, idx: u32) { + self.aliases[idx as usize] = self.smalls_head; self.smalls_head = idx; } - fn push_big(&mut self, idx: usize) { - self.aliases[idx] = self.bigs_head; + fn push_big(&mut self, idx: u32) { + self.aliases[idx as usize] = self.bigs_head; self.bigs_head = idx; } - fn pop_small(&mut self) -> usize { + fn pop_small(&mut self) -> u32 { let popped = self.smalls_head; - self.smalls_head = self.aliases[popped]; + self.smalls_head = self.aliases[popped as usize]; popped } - fn pop_big(&mut self) -> usize { + fn pop_big(&mut self) -> u32 { let popped = self.bigs_head; - self.bigs_head = self.aliases[popped]; + self.bigs_head = self.aliases[popped as usize]; popped } fn smalls_is_empty(&self) -> bool { - self.smalls_head == ::core::usize::MAX + self.smalls_head == ::core::u32::MAX } fn bigs_is_empty(&self) -> bool { - self.bigs_head == ::core::usize::MAX + self.bigs_head == ::core::u32::MAX } - fn set_alias(&mut self, idx: usize, alias: usize) { - self.aliases[idx] = alias; + fn set_alias(&mut self, idx: u32, alias: u32) { + self.aliases[idx as usize] = alias; } } @@ -173,9 +177,9 @@ impl WeightedIndex { // Split indices into those with small weights and those with big weights. for (index, &odds) in no_alias_odds.iter().enumerate() { if odds < weight_sum { - aliases.push_small(index); + aliases.push_small(index as u32); } else { - aliases.push_big(index); + aliases.push_big(index as u32); } } @@ -186,9 +190,11 @@ impl WeightedIndex { let b = aliases.pop_big(); aliases.set_alias(s, b); - no_alias_odds[b] = no_alias_odds[b] - weight_sum + no_alias_odds[s]; + no_alias_odds[b as usize] = no_alias_odds[b as usize] + - weight_sum + + no_alias_odds[s as usize]; - if no_alias_odds[b] < weight_sum { + if no_alias_odds[b as usize] < weight_sum { aliases.push_small(b); } else { aliases.push_big(b); @@ -198,10 +204,10 @@ impl WeightedIndex { // The remaining indices should have no alias odds of about 100%. This is due to // numeric accuracy. Otherwise they would be exactly 100%. while !aliases.smalls_is_empty() { - no_alias_odds[aliases.pop_small()] = weight_sum; + no_alias_odds[aliases.pop_small() as usize] = weight_sum; } while !aliases.bigs_is_empty() { - no_alias_odds[aliases.pop_big()] = weight_sum; + no_alias_odds[aliases.pop_big() as usize] = weight_sum; } // Prepare distributions for sampling. Creating them beforehand improves @@ -221,10 +227,10 @@ impl WeightedIndex { impl Distribution for WeightedIndex { fn sample(&self, rng: &mut R) -> usize { let candidate = rng.sample(self.uniform_index); - if rng.sample(&self.uniform_within_weight_sum) < self.no_alias_odds[candidate] { - candidate + if rng.sample(&self.uniform_within_weight_sum) < self.no_alias_odds[candidate as usize] { + candidate as usize } else { - self.aliases[candidate] + self.aliases[candidate as usize] as usize } } } @@ -282,10 +288,10 @@ pub trait Weight: /// Element of `Self` equivalent to 0. const ZERO: Self; - /// Produce an instance of `Self` from a `usize` value, or return `None` if + /// Produce an instance of `Self` from a `u32` value, or return `None` if /// out of range. Loss of precision (where `Self` is a floating point type) /// is acceptable. - fn try_from_usize_lossy(n: usize) -> Option; + fn try_from_u32_lossy(n: u32) -> Option; /// Sums all values in slice `values`. fn sum(values: &[Self]) -> Self { @@ -299,7 +305,7 @@ macro_rules! impl_weight_for_float { const MAX: Self = ::core::$T::MAX; const ZERO: Self = 0.0; - fn try_from_usize_lossy(n: usize) -> Option { + fn try_from_u32_lossy(n: u32) -> Option { Some(n as $T) } @@ -328,9 +334,9 @@ macro_rules! impl_weight_for_int { const MAX: Self = ::core::$T::MAX; const ZERO: Self = 0; - fn try_from_usize_lossy(n: usize) -> Option { + fn try_from_u32_lossy(n: u32) -> Option { let n_converted = n as Self; - if n_converted >= Self::ZERO && n_converted as usize == n { + if n_converted >= Self::ZERO && n_converted as u32 == n { Some(n_converted) } else { None @@ -439,21 +445,21 @@ mod test { where WeightedIndex: fmt::Debug, { - const NUM_WEIGHTS: usize = 10; - const ZERO_WEIGHT_INDEX: usize = 3; + const NUM_WEIGHTS: u32 = 10; + const ZERO_WEIGHT_INDEX: u32 = 3; const NUM_SAMPLES: u32 = 15000; let mut rng = ::test::rng(0x9c9fa0b0580a7031); let weights = { - let mut weights = Vec::with_capacity(NUM_WEIGHTS); + let mut weights = Vec::with_capacity(NUM_WEIGHTS as usize); let random_weight_distribution = ::distributions::Uniform::new_inclusive( W::ZERO, - W::MAX / W::try_from_usize_lossy(NUM_WEIGHTS).unwrap(), + W::MAX / W::try_from_u32_lossy(NUM_WEIGHTS).unwrap(), ); for _ in 0..NUM_WEIGHTS { weights.push(rng.sample(&random_weight_distribution)); } - weights[ZERO_WEIGHT_INDEX] = W::ZERO; + weights[ZERO_WEIGHT_INDEX as usize] = W::ZERO; weights }; let weight_sum = weights.iter().map(|w| *w).sum::(); @@ -463,12 +469,12 @@ mod test { .collect::>(); let weight_distribution = WeightedIndex::new(weights).unwrap(); - let mut counts = vec![0_usize; NUM_WEIGHTS]; + let mut counts = vec![0; NUM_WEIGHTS as usize]; for _ in 0..NUM_SAMPLES { counts[rng.sample(&weight_distribution)] += 1; } - assert_eq!(counts[ZERO_WEIGHT_INDEX], 0); + assert_eq!(counts[ZERO_WEIGHT_INDEX as usize], 0); for (count, expected_count) in counts.into_iter().zip(expected_counts) { let difference = (count as f64 - expected_count).abs(); let max_allowed_difference = NUM_SAMPLES as f64 / NUM_WEIGHTS as f64 * 0.1; diff --git a/src/distributions/weighted/mod.rs b/src/distributions/weighted/mod.rs index 6086e6f7873..660f177f616 100644 --- a/src/distributions/weighted/mod.rs +++ b/src/distributions/weighted/mod.rs @@ -208,6 +208,9 @@ pub enum WeightedError { /// All items in the provided weight collection are zero. AllWeightsZero, + + /// Too many weights are provided (length greater than `u32::MAX`) + TooMany, } impl WeightedError { @@ -216,6 +219,7 @@ impl WeightedError { WeightedError::NoItem => "No weights provided.", WeightedError::InvalidWeight => "A weight is invalid.", WeightedError::AllWeightsZero => "All weights are zero.", + WeightedError::TooMany => "Too many weights (hit u32::MAX)", } } } diff --git a/src/seq/index.rs b/src/seq/index.rs index 79ed6c0ec7e..b6fc81e1bb1 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -16,7 +16,7 @@ #[cfg(feature="std")] use std::collections::{HashSet}; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; -#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; +#[cfg(feature="alloc")] use distributions::{Distribution, Uniform, uniform::SampleUniform}; use Rng; /// A vector of indices. @@ -212,9 +212,7 @@ where R: Rng + ?Sized { if (length as f32) < C[j] * (amount as f32) { sample_inplace(rng, length, amount) } else { - // note: could have a specific u32 impl, but I'm lazy and - // generics don't have usable conversions - sample_rejection(rng, length as usize, amount as usize) + sample_rejection(rng, length, amount) } } } @@ -285,20 +283,36 @@ where R: Rng + ?Sized { IndexVec::from(indices) } +trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform + core::hash::Hash { + fn zero() -> Self; + fn as_usize(self) -> usize; +} +impl UInt for u32 { + #[inline] fn zero() -> Self { 0 } + #[inline] fn as_usize(self) -> usize { self as usize } +} +impl UInt for usize { + #[inline] fn zero() -> Self { 0 } + #[inline] fn as_usize(self) -> usize { self } +} + /// Randomly sample exactly `amount` indices from `0..length`, using rejection /// sampling. /// /// Since `amount <<< length` there is a low chance of a random sample in /// `0..length` being a duplicate. We test for duplicates and resample where /// necessary. The algorithm is `O(amount)` time and memory. -fn sample_rejection(rng: &mut R, length: usize, amount: usize) -> IndexVec -where R: Rng + ?Sized { +/// +/// This function is generic over X primarily so that results are value-stable +/// over 32-bit and 64-bit platforms. +fn sample_rejection(rng: &mut R, length: X, amount: X) -> IndexVec +where R: Rng + ?Sized, IndexVec: From> { debug_assert!(amount < length); - #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); + #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount.as_usize()); #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); - let distr = Uniform::new(0, length); - let mut indices = Vec::with_capacity(amount); - for _ in 0..amount { + let distr = Uniform::new(X::zero(), length); + let mut indices = Vec::with_capacity(amount.as_usize()); + for _ in 0..amount.as_usize() { let mut pos = distr.sample(rng); while !cache.insert(pos) { pos = distr.sample(rng); @@ -306,7 +320,7 @@ where R: Rng + ?Sized { indices.push(pos); } - debug_assert_eq!(indices.len(), amount); + debug_assert_eq!(indices.len(), amount.as_usize()); IndexVec::from(indices) } @@ -322,14 +336,14 @@ mod test { assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0); assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]); - assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0); + assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]); // These algorithms should be fast with big numbers. Test average. - let sum: usize = sample_rejection(&mut r, 1 << 25, 10) + let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32) .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); @@ -368,7 +382,7 @@ mod test { // A large length and larger amount should use cache let (length, amount): (usize, usize) = (1<<20, 600); let v1 = sample(&mut seed_rng(422), length, amount); - let v2 = sample_rejection(&mut seed_rng(422), length, amount); + let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32); assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); } diff --git a/src/seq/mod.rs b/src/seq/mod.rs index e4cca760f47..5ad4e801fbd 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -200,13 +200,13 @@ pub trait IteratorRandom: Iterator + Sized { let mut result = None; if upper == Some(lower) { - return if lower == 0 { None } else { self.nth(rng.gen_range(0, lower)) }; + return if lower == 0 { None } else { self.nth(gen_index(rng, lower)) }; } // Continue until the iterator is exhausted loop { if lower > 1 { - let ix = rng.gen_range(0, lower + consumed); + let ix = gen_index(rng, lower + consumed); let skip; if ix < lower { result = self.nth(ix); @@ -267,7 +267,7 @@ pub trait IteratorRandom: Iterator + Sized { // Continue, since the iterator was not exhausted for (i, elem) in self.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); + let k = gen_index(rng, i + 1 + amount); if let Some(slot) = buf.get_mut(k) { *slot = elem; } @@ -300,7 +300,7 @@ pub trait IteratorRandom: Iterator + Sized { // If the iterator stops once, then so do we. if reservoir.len() == amount { for (i, elem) in self.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); + let k = gen_index(rng, i + 1 + amount); if let Some(slot) = reservoir.get_mut(k) { *slot = elem; } @@ -323,7 +323,7 @@ impl SliceRandom for [T] { if self.is_empty() { None } else { - Some(&self[rng.gen_range(0, self.len())]) + Some(&self[gen_index(rng, self.len())]) } } @@ -333,7 +333,7 @@ impl SliceRandom for [T] { None } else { let len = self.len(); - Some(&mut self[rng.gen_range(0, len)]) + Some(&mut self[gen_index(rng, len)]) } } @@ -390,7 +390,7 @@ impl SliceRandom for [T] { where R: Rng + ?Sized { for i in (1..self.len()).rev() { // invariant: elements with index > i have been locked in place. - self.swap(i, rng.gen_range(0, i + 1)); + self.swap(i, gen_index(rng, i + 1)); } } @@ -408,7 +408,7 @@ impl SliceRandom for [T] { for i in (end..len).rev() { // invariant: elements with index > i have been locked in place. - self.swap(i, rng.gen_range(0, i + 1)); + self.swap(i, gen_index(rng, i + 1)); } let r = self.split_at_mut(end); (r.1, r.0) @@ -451,6 +451,19 @@ impl<'a, S: Index + ?Sized + 'a, T: 'a> ExactSizeIterator } +// Sample a number uniformly between 0 and `ubound`. Uses 32-bit sampling where +// possible, primarily in order to produce the same output on 32-bit and 64-bit +// platforms. +#[inline] +fn gen_index(rng: &mut R, ubound: usize) -> usize { + if ubound <= (core::u32::MAX as usize) { + rng.gen_range(0, ubound as u32) as usize + } else { + rng.gen_range(0, ubound) + } +} + + #[cfg(test)] mod test { use super::*;