From 8cdf928fd4b0e20ddf5d91220f8f2a8ae3b6dd50 Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Sun, 27 Sep 2020 02:21:12 +0100 Subject: [PATCH] FEAT: Powerset iterator adaptor An iterator to iterate through the powerset of the elements from an iterator. --- src/combinations.rs | 46 ++++++++++++++++++++++++++++++------- src/lazy_buffer.rs | 15 ++++++++---- src/lib.rs | 40 ++++++++++++++++++++++++++++++++ src/powerset.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++ tests/quick.rs | 7 ++++++ tests/test_std.rs | 17 ++++++++++++++ 6 files changed, 169 insertions(+), 12 deletions(-) create mode 100644 src/powerset.rs diff --git a/src/combinations.rs b/src/combinations.rs index 9231a7b41..31fb5e597 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -31,13 +31,8 @@ impl fmt::Debug for Combinations pub fn combinations(iter: I, k: usize) -> Combinations where I: Iterator { - let mut pool: LazyBuffer = LazyBuffer::new(iter); - - for _ in 0..k { - if !pool.get_next() { - break; - } - } + let mut pool = LazyBuffer::new(iter); + pool.prefill(k); Combinations { indices: (0..k).collect(), @@ -46,6 +41,41 @@ pub fn combinations(iter: I, k: usize) -> Combinations } } +impl Combinations { + /// Returns the length of a combination produced by this iterator. + #[inline] + pub fn k(&self) -> usize { self.indices.len() } + + /// Returns the (current) length of the pool from which combination elements are + /// selected. This value can change between invocations of [`next`]. + /// + /// [`next`]: #method.next + #[inline] + pub fn n(&self) -> usize { self.pool.len() } + + /// Resets this `Combinations` back to an initial state for combinations of length + /// `k` over the same pool data source. If `k` is larger than the current length + /// of the data pool an attempt is made to prefill the pool so that it holds `k` + /// elements. + pub(crate) fn reset(&mut self, k: usize) { + self.first = true; + + if k < self.indices.len() { + self.indices.truncate(k); + for i in 0..k { + self.indices[i] = i; + } + + } else { + for i in 0..self.indices.len() { + self.indices[i] = i; + } + self.indices.extend(self.indices.len()..k); + self.pool.prefill(k); + } + } +} + impl Iterator for Combinations where I: Iterator, I::Item: Clone @@ -53,7 +83,7 @@ impl Iterator for Combinations type Item = Vec; fn next(&mut self) -> Option { if self.first { - if self.pool.is_done() { + if self.k() > self.n() { return None; } self.first = false; diff --git a/src/lazy_buffer.rs b/src/lazy_buffer.rs index 931755aa5..fa514ec2d 100644 --- a/src/lazy_buffer.rs +++ b/src/lazy_buffer.rs @@ -24,10 +24,6 @@ where self.buffer.len() } - pub fn is_done(&self) -> bool { - self.done - } - pub fn get_next(&mut self) -> bool { if self.done { return false; @@ -44,6 +40,17 @@ where } } } + + pub fn prefill(&mut self, len: usize) { + let buffer_len = self.buffer.len(); + + if !self.done && len > buffer_len { + let delta = len - buffer_len; + + self.buffer.extend(self.it.by_ref().take(delta)); + self.done = self.buffer.len() < len; + } + } } impl Index for LazyBuffer diff --git a/src/lib.rs b/src/lib.rs index 9ce015209..9dd0e12c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -134,6 +134,8 @@ pub mod structs { pub use crate::permutations::Permutations; pub use crate::process_results_impl::ProcessResults; #[cfg(feature = "use_alloc")] + pub use crate::powerset::Powerset; + #[cfg(feature = "use_alloc")] pub use crate::put_back_n_impl::PutBackN; #[cfg(feature = "use_alloc")] pub use crate::rciter_impl::RcIter; @@ -207,6 +209,8 @@ mod peek_nth; mod peeking_take_while; #[cfg(feature = "use_alloc")] mod permutations; +#[cfg(feature = "use_alloc")] +mod powerset; mod process_results_impl; #[cfg(feature = "use_alloc")] mod put_back_n_impl; @@ -1406,6 +1410,42 @@ pub trait Itertools : Iterator { permutations::permutations(self, k) } + /// Return an iterator that iterates through the powerset of the elements from an + /// iterator. + /// + /// Iterator element type is `Vec`. The iterator produces a new `Vec` + /// per iteration, and clones the iterator elements. + /// + /// The powerset of a set contains all subsets including the empty set and the full + /// input set. A powerset has length _2^n_ where _n_ is the length of the input + /// set. + /// + /// Each `Vec` produced by this iterator represents a subset of the elements + /// produced by the source iterator. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let sets = (1..4).powerset().collect::>(); + /// itertools::assert_equal(sets, vec![ + /// vec![], + /// vec![1], + /// vec![2], + /// vec![3], + /// vec![1, 2], + /// vec![1, 3], + /// vec![2, 3], + /// vec![1, 2, 3], + /// ]); + /// ``` + #[cfg(feature = "use_alloc")] + fn powerset(self) -> Powerset + where Self: Sized, + Self::Item: Clone, + { + powerset::powerset(self) + } + /// Return an iterator adaptor that pads the sequence to a minimum length of /// `min` by filling missing elements using a closure `f`. /// diff --git a/src/powerset.rs b/src/powerset.rs new file mode 100644 index 000000000..df42ff514 --- /dev/null +++ b/src/powerset.rs @@ -0,0 +1,56 @@ +use std::fmt; +use alloc::vec::Vec; + +use super::combinations::{Combinations, combinations}; + +/// An iterator to iterate through the powerset of the elements from an iterator. +/// +/// See [`.powerset()`](../trait.Itertools.html#method.powerset) for more +/// information. +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct Powerset { + combs: Combinations, +} + +impl Clone for Powerset + where I: Clone + Iterator, + I::Item: Clone, +{ + clone_fields!(combs); +} + +impl fmt::Debug for Powerset + where I: Iterator + fmt::Debug, + I::Item: fmt::Debug, +{ + debug_fmt_fields!(Powerset, combs); +} + +/// Create a new `Powerset` from a clonable iterator. +pub fn powerset(src: I) -> Powerset + where I: Iterator, + I::Item: Clone, +{ + Powerset { combs: combinations(src, 0) } +} + +impl Iterator for Powerset + where + I: Iterator, + I::Item: Clone, +{ + type Item = Vec; + + fn next(&mut self) -> Option { + if let Some(elt) = self.combs.next() { + Some(elt) + } else if self.combs.k() < self.combs.n() + || self.combs.k() == 0 + { + self.combs.reset(self.combs.k() + 1); + self.combs.next() + } else { + None + } + } +} diff --git a/tests/quick.rs b/tests/quick.rs index ff05a478c..4c54fc286 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -907,6 +907,13 @@ quickcheck! { } } +quickcheck! { + fn size_powerset(it: Iter) -> bool { + // Powerset cardinality gets large very quickly, limit input to keep test fast. + correct_size_hint(it.take(12).powerset()) + } +} + quickcheck! { fn size_unique(it: Iter) -> bool { correct_size_hint(it.unique()) diff --git a/tests/test_std.rs b/tests/test_std.rs index 24d0f0888..a9f9faeb4 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -764,6 +764,23 @@ fn combinations_with_replacement() { ); } +#[test] +fn powerset() { + it::assert_equal((0..0).powerset(), vec![vec![]]); + it::assert_equal((0..1).powerset(), vec![vec![], vec![0]]); + it::assert_equal((0..2).powerset(), vec![vec![], vec![0], vec![1], vec![0, 1]]); + it::assert_equal((0..3).powerset(), vec![ + vec![], + vec![0], vec![1], vec![2], + vec![0, 1], vec![0, 2], vec![1, 2], + vec![0, 1, 2] + ]); + + assert_eq!((0..4).powerset().count(), 1 << 4); + assert_eq!((0..8).powerset().count(), 1 << 8); + assert_eq!((0..16).powerset().count(), 1 << 16); +} + #[test] fn diff_mismatch() { let a = vec![1, 2, 3, 4];