From 2b005c2adc1f2e40082c398223ff940036c76e45 Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Tue, 29 Sep 2020 22:25:42 +0100 Subject: [PATCH 1/4] Add benchmarks for Combinations Renames tuple_combinations benchmark functions to tuple_comb_* for clarity in test results. --- Cargo.toml | 4 ++ benches/combinations.rs | 125 ++++++++++++++++++++++++++++++++++ benches/tuple_combinations.rs | 48 ++++++------- 3 files changed, 153 insertions(+), 24 deletions(-) create mode 100644 benches/combinations.rs diff --git a/Cargo.toml b/Cargo.toml index 3de576bc3..d1bee6e7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,3 +67,7 @@ harness = false [[bench]] name = "bench1" harness = false + +[[bench]] +name = "combinations" +harness = false diff --git a/benches/combinations.rs b/benches/combinations.rs new file mode 100644 index 000000000..e7433a4cb --- /dev/null +++ b/benches/combinations.rs @@ -0,0 +1,125 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; + +// approximate 100_000 iterations for each combination +const N1: usize = 100_000; +const N2: usize = 448; +const N3: usize = 86; +const N4: usize = 41; +const N14: usize = 21; + +fn comb_for1(c: &mut Criterion) { + c.bench_function("comb for1", move |b| { + b.iter(|| { + for i in 0..N1 { + black_box(vec![i]); + } + }) + }); +} + +fn comb_for2(c: &mut Criterion) { + c.bench_function("comb for2", move |b| { + b.iter(|| { + for i in 0..N2 { + for j in (i + 1)..N2 { + black_box(vec![i, j]); + } + } + }) + }); +} + +fn comb_for3(c: &mut Criterion) { + c.bench_function("comb for3", move |b| { + b.iter(|| { + for i in 0..N3 { + for j in (i + 1)..N3 { + for k in (j + 1)..N3 { + black_box(vec![i, j, k]); + } + } + } + }) + }); +} + +fn comb_for4(c: &mut Criterion) { + c.bench_function("comb for4", move |b| { + b.iter(|| { + for i in 0..N4 { + for j in (i + 1)..N4 { + for k in (j + 1)..N4 { + for l in (k + 1)..N4 { + black_box(vec![i, j, k, l]); + } + } + } + } + }) + }); +} + +fn comb_c1(c: &mut Criterion) { + c.bench_function("comb c1", move |b| { + b.iter(|| { + for combo in (0..N1).combinations(1) { + black_box(combo); + } + }) + }); +} + +fn comb_c2(c: &mut Criterion) { + c.bench_function("comb c2", move |b| { + b.iter(|| { + for combo in (0..N2).combinations(2) { + black_box(combo); + } + }) + }); +} + +fn comb_c3(c: &mut Criterion) { + c.bench_function("comb c3", move |b| { + b.iter(|| { + for combo in (0..N3).combinations(3) { + black_box(combo); + } + }) + }); +} + +fn comb_c4(c: &mut Criterion) { + c.bench_function("comb c4", move |b| { + b.iter(|| { + for combo in (0..N4).combinations(4) { + black_box(combo); + } + }) + }); +} + +fn comb_c14(c: &mut Criterion) { + c.bench_function("comb c14", move |b| { + b.iter(|| { + for combo in (0..N14).combinations(14) { + black_box(combo); + } + }) + }); +} + +criterion_group!( + benches, + comb_for1, + comb_for2, + comb_for3, + comb_for4, + comb_c1, + comb_c2, + comb_c3, + comb_c4, + comb_c14, +); +criterion_main!(benches); diff --git a/benches/tuple_combinations.rs b/benches/tuple_combinations.rs index 84411efd8..4e26b282e 100644 --- a/benches/tuple_combinations.rs +++ b/benches/tuple_combinations.rs @@ -7,8 +7,8 @@ const N2: usize = 448; const N3: usize = 86; const N4: usize = 41; -fn comb_for1(c: &mut Criterion) { - c.bench_function("comb for1", move |b| { +fn tuple_comb_for1(c: &mut Criterion) { + c.bench_function("tuple comb for1", move |b| { b.iter(|| { for i in 0..N1 { black_box(i); @@ -17,8 +17,8 @@ fn comb_for1(c: &mut Criterion) { }); } -fn comb_for2(c: &mut Criterion) { - c.bench_function("comb for2", move |b| { +fn tuple_comb_for2(c: &mut Criterion) { + c.bench_function("tuple comb for2", move |b| { b.iter(|| { for i in 0..N2 { for j in (i + 1)..N2 { @@ -29,8 +29,8 @@ fn comb_for2(c: &mut Criterion) { }); } -fn comb_for3(c: &mut Criterion) { - c.bench_function("comb for3", move |b| { +fn tuple_comb_for3(c: &mut Criterion) { + c.bench_function("tuple comb for3", move |b| { b.iter(|| { for i in 0..N3 { for j in (i + 1)..N3 { @@ -43,8 +43,8 @@ fn comb_for3(c: &mut Criterion) { }); } -fn comb_for4(c: &mut Criterion) { - c.bench_function("comb for4", move |b| { +fn tuple_comb_for4(c: &mut Criterion) { + c.bench_function("tuple comb for4", move |b| { b.iter(|| { for i in 0..N4 { for j in (i + 1)..N4 { @@ -59,8 +59,8 @@ fn comb_for4(c: &mut Criterion) { }); } -fn comb_c1(c: &mut Criterion) { - c.bench_function("comb c1", move |b| { +fn tuple_comb_c1(c: &mut Criterion) { + c.bench_function("tuple comb c1", move |b| { b.iter(|| { for (i,) in (0..N1).tuple_combinations() { black_box(i); @@ -69,8 +69,8 @@ fn comb_c1(c: &mut Criterion) { }); } -fn comb_c2(c: &mut Criterion) { - c.bench_function("comb c2", move |b| { +fn tuple_comb_c2(c: &mut Criterion) { + c.bench_function("tuple comb c2", move |b| { b.iter(|| { for (i, j) in (0..N2).tuple_combinations() { black_box(i + j); @@ -79,8 +79,8 @@ fn comb_c2(c: &mut Criterion) { }); } -fn comb_c3(c: &mut Criterion) { - c.bench_function("comb c3", move |b| { +fn tuple_comb_c3(c: &mut Criterion) { + c.bench_function("tuple comb c3", move |b| { b.iter(|| { for (i, j, k) in (0..N3).tuple_combinations() { black_box(i + j + k); @@ -89,8 +89,8 @@ fn comb_c3(c: &mut Criterion) { }); } -fn comb_c4(c: &mut Criterion) { - c.bench_function("comb c4", move |b| { +fn tuple_comb_c4(c: &mut Criterion) { + c.bench_function("tuple comb c4", move |b| { b.iter(|| { for (i, j, k, l) in (0..N4).tuple_combinations() { black_box(i + j + k + l); @@ -101,13 +101,13 @@ fn comb_c4(c: &mut Criterion) { criterion_group!( benches, - comb_for1, - comb_for2, - comb_for3, - comb_for4, - comb_c1, - comb_c2, - comb_c3, - comb_c4, + tuple_comb_for1, + tuple_comb_for2, + tuple_comb_for3, + tuple_comb_for4, + tuple_comb_c1, + tuple_comb_c2, + tuple_comb_c3, + tuple_comb_c4, ); criterion_main!(benches); From 8cdf928fd4b0e20ddf5d91220f8f2a8ae3b6dd50 Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Sun, 27 Sep 2020 02:21:12 +0100 Subject: [PATCH 2/4] FEAT: Powerset iterator adaptor An iterator to iterate through the powerset of the elements from an iterator. --- src/combinations.rs | 46 ++++++++++++++++++++++++++++++------- src/lazy_buffer.rs | 15 ++++++++---- src/lib.rs | 40 ++++++++++++++++++++++++++++++++ src/powerset.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++ tests/quick.rs | 7 ++++++ tests/test_std.rs | 17 ++++++++++++++ 6 files changed, 169 insertions(+), 12 deletions(-) create mode 100644 src/powerset.rs diff --git a/src/combinations.rs b/src/combinations.rs index 9231a7b41..31fb5e597 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -31,13 +31,8 @@ impl fmt::Debug for Combinations pub fn combinations(iter: I, k: usize) -> Combinations where I: Iterator { - let mut pool: LazyBuffer = LazyBuffer::new(iter); - - for _ in 0..k { - if !pool.get_next() { - break; - } - } + let mut pool = LazyBuffer::new(iter); + pool.prefill(k); Combinations { indices: (0..k).collect(), @@ -46,6 +41,41 @@ pub fn combinations(iter: I, k: usize) -> Combinations } } +impl Combinations { + /// Returns the length of a combination produced by this iterator. + #[inline] + pub fn k(&self) -> usize { self.indices.len() } + + /// Returns the (current) length of the pool from which combination elements are + /// selected. This value can change between invocations of [`next`]. + /// + /// [`next`]: #method.next + #[inline] + pub fn n(&self) -> usize { self.pool.len() } + + /// Resets this `Combinations` back to an initial state for combinations of length + /// `k` over the same pool data source. If `k` is larger than the current length + /// of the data pool an attempt is made to prefill the pool so that it holds `k` + /// elements. + pub(crate) fn reset(&mut self, k: usize) { + self.first = true; + + if k < self.indices.len() { + self.indices.truncate(k); + for i in 0..k { + self.indices[i] = i; + } + + } else { + for i in 0..self.indices.len() { + self.indices[i] = i; + } + self.indices.extend(self.indices.len()..k); + self.pool.prefill(k); + } + } +} + impl Iterator for Combinations where I: Iterator, I::Item: Clone @@ -53,7 +83,7 @@ impl Iterator for Combinations type Item = Vec; fn next(&mut self) -> Option { if self.first { - if self.pool.is_done() { + if self.k() > self.n() { return None; } self.first = false; diff --git a/src/lazy_buffer.rs b/src/lazy_buffer.rs index 931755aa5..fa514ec2d 100644 --- a/src/lazy_buffer.rs +++ b/src/lazy_buffer.rs @@ -24,10 +24,6 @@ where self.buffer.len() } - pub fn is_done(&self) -> bool { - self.done - } - pub fn get_next(&mut self) -> bool { if self.done { return false; @@ -44,6 +40,17 @@ where } } } + + pub fn prefill(&mut self, len: usize) { + let buffer_len = self.buffer.len(); + + if !self.done && len > buffer_len { + let delta = len - buffer_len; + + self.buffer.extend(self.it.by_ref().take(delta)); + self.done = self.buffer.len() < len; + } + } } impl Index for LazyBuffer diff --git a/src/lib.rs b/src/lib.rs index 9ce015209..9dd0e12c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -134,6 +134,8 @@ pub mod structs { pub use crate::permutations::Permutations; pub use crate::process_results_impl::ProcessResults; #[cfg(feature = "use_alloc")] + pub use crate::powerset::Powerset; + #[cfg(feature = "use_alloc")] pub use crate::put_back_n_impl::PutBackN; #[cfg(feature = "use_alloc")] pub use crate::rciter_impl::RcIter; @@ -207,6 +209,8 @@ mod peek_nth; mod peeking_take_while; #[cfg(feature = "use_alloc")] mod permutations; +#[cfg(feature = "use_alloc")] +mod powerset; mod process_results_impl; #[cfg(feature = "use_alloc")] mod put_back_n_impl; @@ -1406,6 +1410,42 @@ pub trait Itertools : Iterator { permutations::permutations(self, k) } + /// Return an iterator that iterates through the powerset of the elements from an + /// iterator. + /// + /// Iterator element type is `Vec`. The iterator produces a new `Vec` + /// per iteration, and clones the iterator elements. + /// + /// The powerset of a set contains all subsets including the empty set and the full + /// input set. A powerset has length _2^n_ where _n_ is the length of the input + /// set. + /// + /// Each `Vec` produced by this iterator represents a subset of the elements + /// produced by the source iterator. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let sets = (1..4).powerset().collect::>(); + /// itertools::assert_equal(sets, vec![ + /// vec![], + /// vec![1], + /// vec![2], + /// vec![3], + /// vec![1, 2], + /// vec![1, 3], + /// vec![2, 3], + /// vec![1, 2, 3], + /// ]); + /// ``` + #[cfg(feature = "use_alloc")] + fn powerset(self) -> Powerset + where Self: Sized, + Self::Item: Clone, + { + powerset::powerset(self) + } + /// Return an iterator adaptor that pads the sequence to a minimum length of /// `min` by filling missing elements using a closure `f`. /// diff --git a/src/powerset.rs b/src/powerset.rs new file mode 100644 index 000000000..df42ff514 --- /dev/null +++ b/src/powerset.rs @@ -0,0 +1,56 @@ +use std::fmt; +use alloc::vec::Vec; + +use super::combinations::{Combinations, combinations}; + +/// An iterator to iterate through the powerset of the elements from an iterator. +/// +/// See [`.powerset()`](../trait.Itertools.html#method.powerset) for more +/// information. +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct Powerset { + combs: Combinations, +} + +impl Clone for Powerset + where I: Clone + Iterator, + I::Item: Clone, +{ + clone_fields!(combs); +} + +impl fmt::Debug for Powerset + where I: Iterator + fmt::Debug, + I::Item: fmt::Debug, +{ + debug_fmt_fields!(Powerset, combs); +} + +/// Create a new `Powerset` from a clonable iterator. +pub fn powerset(src: I) -> Powerset + where I: Iterator, + I::Item: Clone, +{ + Powerset { combs: combinations(src, 0) } +} + +impl Iterator for Powerset + where + I: Iterator, + I::Item: Clone, +{ + type Item = Vec; + + fn next(&mut self) -> Option { + if let Some(elt) = self.combs.next() { + Some(elt) + } else if self.combs.k() < self.combs.n() + || self.combs.k() == 0 + { + self.combs.reset(self.combs.k() + 1); + self.combs.next() + } else { + None + } + } +} diff --git a/tests/quick.rs b/tests/quick.rs index ff05a478c..4c54fc286 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -907,6 +907,13 @@ quickcheck! { } } +quickcheck! { + fn size_powerset(it: Iter) -> bool { + // Powerset cardinality gets large very quickly, limit input to keep test fast. + correct_size_hint(it.take(12).powerset()) + } +} + quickcheck! { fn size_unique(it: Iter) -> bool { correct_size_hint(it.unique()) diff --git a/tests/test_std.rs b/tests/test_std.rs index 24d0f0888..a9f9faeb4 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -764,6 +764,23 @@ fn combinations_with_replacement() { ); } +#[test] +fn powerset() { + it::assert_equal((0..0).powerset(), vec![vec![]]); + it::assert_equal((0..1).powerset(), vec![vec![], vec![0]]); + it::assert_equal((0..2).powerset(), vec![vec![], vec![0], vec![1], vec![0, 1]]); + it::assert_equal((0..3).powerset(), vec![ + vec![], + vec![0], vec![1], vec![2], + vec![0, 1], vec![0, 2], vec![1, 2], + vec![0, 1, 2] + ]); + + assert_eq!((0..4).powerset().count(), 1 << 4); + assert_eq!((0..8).powerset().count(), 1 << 8); + assert_eq!((0..16).powerset().count(), 1 << 16); +} + #[test] fn diff_mismatch() { let a = vec![1, 2, 3, 4]; From 8c5d32c1b75aa68da9bb2c5fb32a11d9a2ac1956 Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Tue, 29 Sep 2020 22:34:15 +0100 Subject: [PATCH 3/4] Add benchmarks for Powerset --- Cargo.toml | 4 ++++ benches/powerset.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 benches/powerset.rs diff --git a/Cargo.toml b/Cargo.toml index d1bee6e7c..c6131625e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,3 +71,7 @@ harness = false [[bench]] name = "combinations" harness = false + +[[bench]] +name = "powerset" +harness = false diff --git a/benches/powerset.rs b/benches/powerset.rs new file mode 100644 index 000000000..074550bc4 --- /dev/null +++ b/benches/powerset.rs @@ -0,0 +1,44 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; + +// Keep aggregate generated elements the same, regardless of powerset length. +const TOTAL_ELEMENTS: usize = 1 << 12; +const fn calc_iters(n: usize) -> usize { + TOTAL_ELEMENTS / (1 << n) +} + +fn powerset_n(c: &mut Criterion, n: usize) { + let id = format!("powerset {}", n); + c.bench_function(id.as_str(), move |b| { + b.iter(|| { + for _ in 0..calc_iters(n) { + for elt in (0..n).powerset() { + black_box(elt); + } + } + }) + }); +} + +fn powerset_0(c: &mut Criterion) { powerset_n(c, 0); } + +fn powerset_1(c: &mut Criterion) { powerset_n(c, 1); } + +fn powerset_2(c: &mut Criterion) { powerset_n(c, 2); } + +fn powerset_4(c: &mut Criterion) { powerset_n(c, 4); } + +fn powerset_8(c: &mut Criterion) { powerset_n(c, 8); } + +fn powerset_12(c: &mut Criterion) { powerset_n(c, 12); } + +criterion_group!( + benches, + powerset_0, + powerset_1, + powerset_2, + powerset_4, + powerset_8, + powerset_12, +); +criterion_main!(benches); \ No newline at end of file From 83c0f046c077a71185042e020a04262f388a5157 Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Sat, 26 Sep 2020 15:55:36 +0100 Subject: [PATCH 4/4] Add Iterator::size_hint() method impl. for Powerset --- src/combinations.rs | 4 ++++ src/powerset.rs | 35 +++++++++++++++++++++++++++++++---- src/size_hint.rs | 15 +++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/combinations.rs b/src/combinations.rs index 31fb5e597..e6ba4ac29 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -53,6 +53,10 @@ impl Combinations { #[inline] pub fn n(&self) -> usize { self.pool.len() } + /// Returns a reference to the source iterator. + #[inline] + pub(crate) fn src(&self) -> &I { &self.pool.it } + /// Resets this `Combinations` back to an initial state for combinations of length /// `k` over the same pool data source. If `k` is larger than the current length /// of the data pool an attempt is made to prefill the pool so that it holds `k` diff --git a/src/powerset.rs b/src/powerset.rs index df42ff514..ef17752b3 100644 --- a/src/powerset.rs +++ b/src/powerset.rs @@ -1,7 +1,9 @@ use std::fmt; +use std::usize; use alloc::vec::Vec; use super::combinations::{Combinations, combinations}; +use super::size_hint; /// An iterator to iterate through the powerset of the elements from an iterator. /// @@ -10,20 +12,22 @@ use super::combinations::{Combinations, combinations}; #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] pub struct Powerset { combs: Combinations, + // Iterator `position` (equal to count of yielded elements). + pos: usize, } impl Clone for Powerset where I: Clone + Iterator, I::Item: Clone, { - clone_fields!(combs); + clone_fields!(combs, pos); } impl fmt::Debug for Powerset where I: Iterator + fmt::Debug, I::Item: fmt::Debug, { - debug_fmt_fields!(Powerset, combs); + debug_fmt_fields!(Powerset, combs, pos); } /// Create a new `Powerset` from a clonable iterator. @@ -31,7 +35,10 @@ pub fn powerset(src: I) -> Powerset where I: Iterator, I::Item: Clone, { - Powerset { combs: combinations(src, 0) } + Powerset { + combs: combinations(src, 0), + pos: 0, + } } impl Iterator for Powerset @@ -43,14 +50,34 @@ impl Iterator for Powerset fn next(&mut self) -> Option { if let Some(elt) = self.combs.next() { + self.pos = self.pos.saturating_add(1); Some(elt) } else if self.combs.k() < self.combs.n() || self.combs.k() == 0 { self.combs.reset(self.combs.k() + 1); - self.combs.next() + self.combs.next().map(|elt| { + self.pos = self.pos.saturating_add(1); + elt + }) } else { None } } + + fn size_hint(&self) -> (usize, Option) { + // Total bounds for source iterator. + let src_total = size_hint::add_scalar(self.combs.src().size_hint(), self.combs.n()); + + // Total bounds for self ( length(powerset(set) == 2 ^ length(set) ) + let self_total = size_hint::pow_scalar_base(2, src_total); + + if self.pos < usize::MAX { + // Subtract count of elements already yielded from total. + size_hint::sub_scalar(self_total, self.pos) + } else { + // Fallback: self.pos is saturated and no longer reliable. + (0, self_total.1) + } + } } diff --git a/src/size_hint.rs b/src/size_hint.rs index 9d9b8b8a1..1168ecaa3 100644 --- a/src/size_hint.rs +++ b/src/size_hint.rs @@ -3,6 +3,7 @@ use std::usize; use std::cmp; +use std::u32; /// **SizeHint** is the return type of **Iterator::size_hint()**. pub type SizeHint = (usize, Option); @@ -74,6 +75,20 @@ pub fn mul_scalar(sh: SizeHint, x: usize) -> SizeHint { (low, hi) } +/// Raise `base` correctly by a **`SizeHint`** exponent. +#[inline] +pub fn pow_scalar_base(base: usize, exp: SizeHint) -> SizeHint { + let exp_low = cmp::min(exp.0, u32::MAX as usize) as u32; + let low = base.saturating_pow(exp_low); + + let hi = exp.1.and_then(|exp| { + let exp_hi = cmp::min(exp, u32::MAX as usize) as u32; + base.checked_pow(exp_hi) + }); + + (low, hi) +} + /// Return the maximum #[inline] pub fn max(a: SizeHint, b: SizeHint) -> SizeHint {