Update implementation chosen by sample_indices

rust-random · Jun 1, 2018 · cad42dd · cad42dd
1 parent 5e0a7ac
commit cad42dd
Showing 1 changed file with 79 additions and 24 deletions.
diff --git a/src/seq.rs b/src/seq.rs
@@ -126,36 +126,49 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) ->
 ///
 /// The values are non-repeating and in random order.
 ///
-/// This implementation uses `O(amount)` time and memory.
+/// This method is used internally by the slice sampling methods, but it can
+/// sometimes be useful to have the indices themselves so this is provided as
+/// an alternative.
 ///
-/// This method is used internally by the slice sampling methods, but it can sometimes be useful to
-/// have the indices themselves so this is provided as an alternative.
+/// The implementation used is not specified; we automatically select the
+/// fastest available implementation. Roughly speaking, complexity is
+/// `O(amount)` if `amount` is small relative to `length`, otherwise `O(length)`.
 ///
-/// Panics if `amount > length`
+/// Panics if `amount > length`; may panic with extremely large `amount` or
+/// `length` (when `36*length` or `2720*amount` overflows `usize`).
 pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
     where R: Rng + ?Sized,
 {
     if amount > length {
         panic!("`amount` must be less than or equal to `slice.len()`");
     }
-
-    // We are going to have to allocate at least `amount` for the output no matter what. However,
-    // if we use the `cached` version we will have to allocate `amount` as a HashMap as well since
-    // it inserts an element for every loop.
-    //
-    // Therefore, if `amount >= length / 2` then inplace will be both faster and use less memory.
-    // In fact, benchmarks show the inplace version is faster for length up to about 20 times
-    // faster than amount.
-    //
-    // TODO: there is probably even more fine-tuning that can be done here since
-    // `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice,
-    // and a trade off could probably be made between memory/cpu, since hashmap operations
-    // are slower than array index swapping.
-    if amount >= length / 20 {
-        sample_indices_inplace(rng, length as u32, amount as u32)
-            .into_iter().map(|x| x as usize).collect()
+    
+    // Choice of algorithm here depends on both length and amount. See:
+    // https://github.com/rust-lang-nursery/rand/pull/479
+
+    if amount < 517 {
+        const C: [[usize; 2]; 2] = [[1, 36], [200, 440]];
+        let j = if length < 500_000 { 0 } else { 1 };
+        let m4 = 4 * amount;
+        if C[0][j] * length < (C[1][j] + m4) * amount {
+            sample_indices_inplace(rng, length as u32, amount as u32)
+                .into_iter()
+                .map(|x| x as usize)
+                .collect()
+        } else {
+            sample_indices_floyd(rng, length, amount)
+        }
     } else {
-        sample_indices_cache(rng, length, amount)
+        const C: [[usize; 2]; 2] = [[1, 36], [62*40, 68*40]];
+        let j = if length < 500_000 { 0 } else { 1 };
+        if C[0][j] * length < C[1][j] * amount {
+            sample_indices_inplace(rng, length as u32, amount as u32)
+                .into_iter()
+                .map(|x| x as usize)
+                .collect()
+        } else {
+            sample_indices_cache(rng, length, amount)
+        }
     }
 }
 
@@ -172,7 +185,7 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize
 /// This is likely the fastest for small lengths since it avoids the need for
 /// allocations. Set-up is `O(length)` time and memory and shuffling is
 /// `O(amount)` time.
-pub fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32)
+fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32)
     -> Vec<u32>
     where R: Rng + ?Sized,
 {
@@ -193,7 +206,7 @@ pub fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32)
 /// combination algorithm.
 ///
 /// This implementation uses `O(amount)` memory and `O(amount^2)` time.
-pub fn sample_indices_floyd<R>(rng: &mut R, length: usize, amount: usize)
+fn sample_indices_floyd<R>(rng: &mut R, length: usize, amount: usize)
     -> Vec<usize>
     where R: Rng + ?Sized,
 {
@@ -215,7 +228,7 @@ pub fn sample_indices_floyd<R>(rng: &mut R, length: usize, amount: usize)
 /// especially useful when `amount <<< length`; e.g. selecting 3 non-repeating
 /// values from `1_000_000`. The algorithm is `O(amount)` time and memory,
 /// but due to overheads will often be slower than other approaches.
-pub fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize)
+fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize)
     -> Vec<usize>
     where R: Rng + ?Sized,
 {
@@ -359,4 +372,46 @@ mod test {
             }
         }
     }
+
+    #[test]
+    fn test_sample_alg() {
+        let xor_rng = XorShiftRng::from_seed;
+
+        let mut r = ::test::rng(403);
+        let mut seed = [0u8; 16];
+
+        // We can't test which algorithm is used directly, but each should
+        // produce a different sample with the same parameters.
+
+        // A small length and relatively large amount should use inplace
+        r.fill(&mut seed);
+        let (length, amount) = (100, 50);
+        let v1 = sample_indices(&mut xor_rng(seed), length, amount);
+        let v2 = sample_indices_inplace(&mut xor_rng(seed),
+            length as u32, amount as u32);
+        assert!(v1.iter().all(|e| *e < length));
+        assert!(v1.iter().zip(v2.iter()).all(|(x,y)| *x == *y as usize));
+
+        // Test other algs do produce different results
+        let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount);
+        let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount);
+        assert!(v1 != v3);
+        assert!(v1 != v4);
+
+        // A large length and small amount should use Floyd
+        r.fill(&mut seed);
+        let (length, amount) = (1<<20, 50);
+        let v1 = sample_indices(&mut xor_rng(seed), length, amount);
+        let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount);
+        assert!(v1.iter().all(|e| *e < length));
+        assert_eq!(v1, v2);
+
+        // A large length and larger amount should use cache
+        r.fill(&mut seed);
+        let (length, amount) = (1<<20, 600);
+        let v1 = sample_indices(&mut xor_rng(seed), length, amount);
+        let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount);
+        assert!(v1.iter().all(|e| *e < length));
+        assert_eq!(v1, v2);
+    }
 }