From 7f6eca4dcbbcbda407e25f0a886e523d0d831fa8 Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Sun, 11 Oct 2020 11:11:16 -0700 Subject: [PATCH 1/3] remove unneccessary admonition from choose docs IteratorRandom::choose also guarantees O(1) performance for slices, so there is no need to recommend an alternative --- src/seq/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 333c5d6da56..65c04b4255c 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -296,9 +296,6 @@ pub trait IteratorRandom: Iterator + Sized { /// depends on size hints. In particular, `Iterator` combinators that don't /// change the values yielded but change the size hints may result in /// `choose` returning different elements. - /// - /// For slices, prefer [`SliceRandom::choose`] which guarantees `O(1)` - /// performance. fn choose(mut self, rng: &mut R) -> Option where R: Rng + ?Sized { let (mut lower, mut upper) = self.size_hint(); From dab98b3f0dfb625c346b7f3bc83b8915ca81b5bd Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Mon, 12 Oct 2020 18:14:38 -0700 Subject: [PATCH 2/3] document the ExactSizeIterator optimization in choose() --- src/seq/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 65c04b4255c..1ce85eaa7f9 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -302,6 +302,9 @@ pub trait IteratorRandom: Iterator + Sized { let mut consumed = 0; let mut result = None; + // Handling for this condition outside the loop allows the optimizer to eliminate the loop + // when the Iterator is an ExactSizeIterator. This has a large performance impact on e.g. + // seq_iter_choose_from_1000. if upper == Some(lower) { return if lower == 0 { None From a37a599168552e5454606d19458c713dd771989a Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Sun, 11 Oct 2020 11:03:26 -0700 Subject: [PATCH 3/3] accurate IteratorRandom::choose() f64 reciprocal is inexact, with residuals on the order of 1 / 2^54. For example: the probability gen_bool(1.0 / 3) == true is: 6004799503160661/18014398509481984 Using gen_range is exact for all values of `consumed`. NOTE: this is a value stability-breaking change fixes #1058 --- src/seq/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 1ce85eaa7f9..4dea058c2db 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -336,8 +336,7 @@ pub trait IteratorRandom: Iterator + Sized { return result; } consumed += 1; - let denom = consumed as f64; // accurate to 2^53 elements - if rng.gen_bool(1.0 / denom) { + if gen_index(rng, consumed) == 0 { result = elem; } } @@ -963,7 +962,7 @@ mod test { assert_eq!(choose([].iter().cloned()), None); assert_eq!(choose(0..100), Some(33)); - assert_eq!(choose(UnhintedIterator { iter: 0..100 }), Some(76)); + assert_eq!(choose(UnhintedIterator { iter: 0..100 }), Some(40)); assert_eq!( choose(ChunkHintedIterator { iter: 0..100,