Skip to content

Commit

Permalink
FEAT: Powerset iterator adaptor
Browse files Browse the repository at this point in the history
An iterator adapter that produces vectors containing all subsets of
input iterator elements.
  • Loading branch information
willcrozi committed Mar 1, 2019
1 parent 2a092df commit f9a0c4e
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 2 deletions.
74 changes: 74 additions & 0 deletions benches/powerset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#![feature(test)]

extern crate test;
extern crate itertools;

use test::{black_box, Bencher};
use itertools::Itertools;

// Keep aggregate generated elements the same, regardless of powerset length.
const TOTAL_ELEMENTS: usize = 1 << 16;
const fn calc_iters(n: usize) -> usize {
TOTAL_ELEMENTS / (1 << n)
}

fn pset_n(b: &mut Bencher, n: usize) {
b.iter(|| {
for _ in 0..calc_iters(n) {
for elt in (0..n).powerset() {
black_box(elt);
}
}
});
}

#[bench]
fn pset_00(b: &mut Bencher) { pset_n(b, 0); }

#[bench]
fn pset_01(b: &mut Bencher) { pset_n(b, 1); }

#[bench]
fn pset_02(b: &mut Bencher) { pset_n(b, 2); }

#[bench]
fn pset_03(b: &mut Bencher) { pset_n(b, 3); }

#[bench]
fn pset_04(b: &mut Bencher) { pset_n(b, 4); }

#[bench]
fn pset_05(b: &mut Bencher) { pset_n(b, 5); }

#[bench]
fn pset_06(b: &mut Bencher) { pset_n(b, 6); }

#[bench]
fn pset_07(b: &mut Bencher) { pset_n(b, 7); }

#[bench]
fn pset_08(b: &mut Bencher) { pset_n(b, 8); }

#[bench]
fn pset_09(b: &mut Bencher) { pset_n(b, 9); }

#[bench]
fn pset_10(b: &mut Bencher) { pset_n(b, 10); }

#[bench]
fn pset_11(b: &mut Bencher) { pset_n(b, 11); }

#[bench]
fn pset_12(b: &mut Bencher) { pset_n(b, 12); }

#[bench]
fn pset_13(b: &mut Bencher) { pset_n(b, 13); }

#[bench]
fn pset_14(b: &mut Bencher) { pset_n(b, 14); }

#[bench]
fn pset_15(b: &mut Bencher) { pset_n(b, 15); }

#[bench]
fn pset_16(b: &mut Bencher) { pset_n(b, 16); }
7 changes: 6 additions & 1 deletion src/combinations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ pub fn combinations<I>(iter: I, n: usize) -> Combinations<I>
}
}

impl<I: Iterator> Combinations<I> {
#[inline]
/// Returns the length of a combination produced by this iterator.
pub fn n(&self) -> usize { self.n }
}

impl<I> Iterator for Combinations<I>
where I: Iterator,
I::Item: Clone
Expand Down Expand Up @@ -162,4 +168,3 @@ impl<I> Index<usize> for LazyBuffer<I>
self.buffer.index(_index)
}
}

38 changes: 38 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ pub mod structs {
pub use multipeek_impl::MultiPeek;
pub use pad_tail::PadUsing;
pub use peeking_take_while::PeekingTakeWhile;
#[cfg(feature = "use_std")]
pub use powerset::Powerset;
pub use process_results_impl::ProcessResults;
#[cfg(feature = "use_std")]
pub use put_back_n_impl::PutBackN;
Expand Down Expand Up @@ -175,6 +177,8 @@ mod minmax;
mod multipeek_impl;
mod pad_tail;
mod peeking_take_while;
#[cfg(feature = "use_std")]
mod powerset;
mod process_results_impl;
#[cfg(feature = "use_std")]
mod put_back_n_impl;
Expand Down Expand Up @@ -1131,6 +1135,40 @@ pub trait Itertools : Iterator {
combinations::combinations(self, n)
}

/// An iterator adaptor returning an iterator over the powerset of all elements from its source
/// iterator.
///
/// Iterator element type is `Vec<Self::Item>`. The iterator produces a new Vec per iteration,
/// and clones the iterator elements.
///
/// The powerset of a set contains all subsets including the empty set and the full input set.
/// A powerset has length _2^n_ where _n_ is the length of the input set.
///
/// Each Vec returned from this iterator represents a subset of the elements in the source
/// iterator. Within each Vec the elements are stable in their ordering with regards to the
/// source iterator. That is, any two elements appearing in the source iterator, if present
/// in a returned Vec from this iterator, will have the same relative ordering.
///
/// ```
/// use itertools::Itertools;
///
/// let sets = (1..4).powerset().collect::<Vec<_>>();
/// itertools::assert_equal(sets, vec![
/// vec![],
/// vec![1],
/// vec![2],
/// vec![3],
/// vec![1, 2],
/// vec![1, 3],
/// vec![2, 3],
/// vec![1, 2, 3],
/// ]);
/// ```
#[cfg(feature = "use_std")]
fn powerset(self) -> Powerset<Self>
where Self: Sized,
Self::Item: Clone { powerset::powerset(self) }

/// Return an iterator adaptor that pads the sequence to a minimum length of
/// `min` by filling missing elements using a closure `f`.
///
Expand Down
132 changes: 132 additions & 0 deletions src/powerset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
use std::vec::IntoIter;
use std::fmt;
use std::usize;
use super::combinations::{self, Combinations};
use super::size_hint;

/// An iterator over the powerset of all elements from its source iterator.
///
/// See [`.powerset()`](../trait.Itertools.html#method.powerset) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct Powerset<I: Iterator> {
src: I,
buf: Vec<I::Item>,
// Only used when once a source containing two or more items is exhausted.
combs: Option<SetCombinations<I>>,
// Used for detecting special cases and size_hint() calculation
pos: usize,
}

type SetCombinations<I> = Combinations<IntoIter<<I as Iterator>::Item>>;

impl<I> fmt::Debug for Powerset<I>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
{
debug_fmt_fields!(Powerset, src, buf, combs, pos);
}

/// Create a new `Powerset` from a cloneable iterator.
pub fn powerset<I>(src: I) -> Powerset<I>
where I: Iterator,
I::Item: Clone
{
let (src_low, _) = src.size_hint();

Powerset {
src,
buf: Vec::with_capacity(src_low),
combs: None,
pos: 0,
}
}

impl<I> Powerset<I>
where
I: Iterator,
I::Item: Clone
{
#[inline]
/// Creates and stores the next Combination, returning the first element or None if the
/// iterator is done.
fn next_from_new_combs(&mut self) -> Option<Vec<I::Item>> {
debug_assert!(self.buf.len() > 1);

let new_len = match &self.combs {
Some(combs) => combs.n() + 1,
None => 2
};

let iter = self.buf.clone().into_iter();
let mut combs = combinations::combinations(iter, new_len);
let result = combs.next();
self.combs = Some(combs);

result
}
}

impl<I> Iterator for Powerset<I>
where
I: Iterator,
I::Item: Clone
{
type Item = Vec<I::Item>;

fn next(&mut self) -> Option<Vec<I::Item>> {
let mut new_combs = false;

let mut result = match &mut self.combs {
None if self.pos == 0 => {
// First item, return the empty set
Some(Vec::new())
}
None => {
// Still draining from source iterator
if let Some(elt) = self.src.next() {
self.buf.push(elt.clone());
Some(vec!(elt))
} else {
new_combs = self.buf.len() >= 2;
None
}
}
Some(combs) if combs.n() < self.buf.len() => {
// Generating elements from Combinations
match combs.next() {
Some(elt) => Some(elt),
None => {
new_combs = self.buf.len() >= 2;
None
}
}
}
Some(_) => {
// Iteration is done
None
}
};

// Merge these lines into the relevant match arms above when NLLs are available
if new_combs { result = self.next_from_new_combs(); }
if result.is_some() { self.pos = self.pos.saturating_add(1); }

result
}

fn size_hint(&self) -> (usize, Option<usize>) {
// Aggregate bounds for 'unwalked' source iterator
let src_total = size_hint::add_scalar(self.src.size_hint(), self.buf.len());

// Aggregate bounds for self ( length(powerset(set) == 2 ^ length(set) )
let self_total = size_hint::two_raised_pow(src_total);

if self.pos < usize::MAX {
// Subtract count of walked elements from total
size_hint::sub_scalar(self_total, self.pos)
} else {
// self.pos is saturated, no longer reliable
(0, self_total.1)
}
}
}
24 changes: 23 additions & 1 deletion src/size_hint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ pub fn sub_scalar(sh: SizeHint, x: usize) -> SizeHint {
(low, hi)
}


/// Multiply **SizeHint** correctly
///
/// ```ignore
Expand Down Expand Up @@ -74,6 +73,29 @@ pub fn mul_scalar(sh: SizeHint, x: usize) -> SizeHint {
(low, hi)
}

/// Raise two correctly by a **SizeHint** exponent.
#[inline]
pub fn two_raised_pow(exp: SizeHint) -> SizeHint {
let (mut low, mut hi) = exp;

let shl_range = 1_usize.leading_zeros() as usize;

// saturating 'raise two by exponent'
low = if low <= shl_range {
1 << low
} else {
usize::MAX
};

// checked 'raise two by exponent'
hi = match hi {
Some(hi) if hi <= shl_range => Some(1 << hi),
_ => None
};

(low, hi)
}

/// Return the maximum
#[inline]
pub fn max(a: SizeHint, b: SizeHint) -> SizeHint {
Expand Down
7 changes: 7 additions & 0 deletions tests/quick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,13 @@ quickcheck! {
}
}

quickcheck! {
fn size_powerset(it: Iter<u8, Exact>) -> bool {
// Powerset cardinality gets large very quickly, limit input to keep test fast
correct_size_hint(it.take(12).powerset())
}
}

quickcheck! {
fn size_pad_tail(it: Iter<i8>, pad: u8) -> bool {
correct_size_hint(it.clone().pad_using(pad as usize, |_| 0)) &&
Expand Down
17 changes: 17 additions & 0 deletions tests/test_std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,23 @@ fn combinations_zero() {
it::assert_equal((1..3).combinations(0), vec![vec![]]);
}

#[test]
fn powerset() {
it::assert_equal((0..0).powerset(), vec![vec![]]);
it::assert_equal((0..1).powerset(), vec![vec![], vec![0]]);
it::assert_equal((0..2).powerset(), vec![vec![], vec![0], vec![1], vec![0, 1]]);
it::assert_equal((0..3).powerset(), vec![
vec![],
vec![0], vec![1], vec![2],
vec![0, 1], vec![0, 2], vec![1, 2],
vec![0, 1, 2]
]);

assert_eq!((0..4).powerset().count(), 1 << 4);
assert_eq!((0..8).powerset().count(), 1 << 8);
assert_eq!((0..16).powerset().count(), 1 << 16);
}

#[test]
fn diff_mismatch() {
let a = vec![1, 2, 3, 4];
Expand Down

0 comments on commit f9a0c4e

Please sign in to comment.