Skip to content

Commit

Permalink
FEAT: Powerset iterator adaptor
Browse files Browse the repository at this point in the history
An iterator adapter that produces vectors containing all subsets of
input iterator elements.
  • Loading branch information
willcrozi committed Sep 23, 2020
1 parent 56dfb01 commit 73b3a70
Show file tree
Hide file tree
Showing 7 changed files with 321 additions and 15 deletions.
68 changes: 58 additions & 10 deletions src/combinations.rs
Expand Up @@ -27,22 +27,70 @@ impl<I> fmt::Debug for Combinations<I>
debug_fmt_fields!(Combinations, indices, pool, first);
}

/// Create a new `Combinations` from a clonable iterator.
/// Create a new `Combinations` from a cloneable iterator.
pub fn combinations<I>(iter: I, k: usize) -> Combinations<I>
where I: Iterator
{
let mut pool: LazyBuffer<I> = LazyBuffer::new(iter);
let mut pool = LazyBuffer::new(iter);
pool.prefill(k);

for _ in 0..k {
if !pool.get_next() {
break;
Combinations::from_pool(pool, k)
}

impl<I: Iterator> Combinations<I> {
#[inline]
pub(crate) fn from_pool(pool: LazyBuffer<I>, k: usize) -> Self {
Combinations {
indices: (0..k).collect(),
pool,
first: true,
}
}

Combinations {
indices: (0..k).collect(),
pool,
first: true,
/// Returns the length of a combination produced by this iterator.
#[inline]
pub fn k(&self) -> usize { self.indices.len() }

/// Returns the (current) length of the pool from which combination elements are
/// selected. This value can change between invocations of `next()` and `init()`.
#[inline]
pub(crate) fn n(&self) -> usize { self.pool.len() }

/// Returns a reference to the source iterator.
#[inline]
pub(crate) fn src(&self) -> &I { &self.pool.it }

/// Resets this `Combinations` back to an initial state for combinations of length
/// `k` over the same pool data source. If `k` is larger than the previous value
/// an attempt is made to prefill the pool so that it holds `k` elements.
///
/// Returns `true` if the underlying pool is large enough to provide combinations
/// of length `k`, otherwise `false`.
pub(crate) fn reset(&mut self, k: usize) -> bool {
let prev_k = self.indices.len();
let mut filled = true;

// Shrink/grow indices vector, filling in new values if required.
let reset_limit: usize;
if k < prev_k {
self.indices.truncate(k);
reset_limit = k;
} else {
filled = self.pool.prefill(k);

for i in prev_k..k {
self.indices.push(i);
}
reset_limit = prev_k;
}

// Reset old indices.
for i in 0..reset_limit {
self.indices[i] = i;
}

self.first = true;
filled
}
}

Expand All @@ -53,7 +101,7 @@ impl<I> Iterator for Combinations<I>
type Item = Vec<I::Item>;
fn next(&mut self) -> Option<Self::Item> {
if self.first {
if self.pool.is_done() {
if self.pool.is_done() && self.k() > self.n() {
return None;
}
self.first = false;
Expand Down
29 changes: 24 additions & 5 deletions src/lazy_buffer.rs
Expand Up @@ -13,11 +13,11 @@ where
I: Iterator,
{
pub fn new(it: I) -> LazyBuffer<I> {
LazyBuffer {
it,
done: false,
buffer: Vec::new(),
}
LazyBuffer::from_parts(it, false, Vec::new())
}

pub(crate) fn from_parts(it: I, done: bool, buffer: Vec<I::Item>) -> LazyBuffer<I> {
LazyBuffer { it, done, buffer }
}

pub fn len(&self) -> usize {
Expand All @@ -44,6 +44,25 @@ where
}
}
}

pub fn prefill(&mut self, len: usize) -> bool {
if self.done {
return self.buffer.len() >= len;
}

if len > self.buffer.len() {
let extra = len - self.buffer.len();
for _ in 0..extra {
if let Some(elt) = self.it.next() {
self.buffer.push(elt);
} else {
self.done = true;
return false;
}
}
}
true
}
}

impl<I, J> Index<J> for LazyBuffer<I>
Expand Down
41 changes: 41 additions & 0 deletions src/lib.rs
Expand Up @@ -134,6 +134,8 @@ pub mod structs {
pub use crate::permutations::Permutations;
pub use crate::process_results_impl::ProcessResults;
#[cfg(feature = "use_alloc")]
pub use crate::powerset::Powerset;
#[cfg(feature = "use_alloc")]
pub use crate::put_back_n_impl::PutBackN;
#[cfg(feature = "use_alloc")]
pub use crate::rciter_impl::RcIter;
Expand Down Expand Up @@ -207,6 +209,8 @@ mod peek_nth;
mod peeking_take_while;
#[cfg(feature = "use_alloc")]
mod permutations;
#[cfg(feature = "use_alloc")]
mod powerset;
mod process_results_impl;
#[cfg(feature = "use_alloc")]
mod put_back_n_impl;
Expand Down Expand Up @@ -1406,6 +1410,43 @@ pub trait Itertools : Iterator {
permutations::permutations(self, k)
}

/// An iterator adaptor returning an iterator over the powerset of all elements from its source
/// iterator.
///
/// Iterator element type is `Vec<Self::Item>`. The iterator produces a new Vec per iteration,
/// and clones the iterator elements.
///
/// The powerset of a set contains all subsets including the empty set and the full input set.
/// A powerset has length _2^n_ where _n_ is the length of the input set.
///
/// Each Vec returned from this iterator represents a subset of the elements in the source
/// iterator. Within each Vec the elements are stable in their ordering with regards to the
/// source iterator. That is, any two elements appearing in the source iterator, if present
/// in a returned Vec from this iterator, will have the same relative ordering.
///
/// ```
/// use itertools::Itertools;
///
/// let sets = (1..4).powerset().collect::<Vec<_>>();
/// itertools::assert_equal(sets, vec![
/// vec![],
/// vec![1],
/// vec![2],
/// vec![3],
/// vec![1, 2],
/// vec![1, 3],
/// vec![2, 3],
/// vec![1, 2, 3],
/// ]);
/// ```
#[cfg(feature = "use_alloc")]
fn powerset(self) -> Powerset<Self>
where Self: Sized,
Self::Item: Clone,
{
powerset::powerset(self)
}

/// Return an iterator adaptor that pads the sequence to a minimum length of
/// `min` by filling missing elements using a closure `f`.
///
Expand Down
149 changes: 149 additions & 0 deletions src/powerset.rs
@@ -0,0 +1,149 @@
use std::{fmt, mem};
use std::usize;
use alloc::vec::Vec;

use super::combinations::Combinations;
use super::lazy_buffer::LazyBuffer;
use super::size_hint;

/// An iterator over the powerset of all elements from its source iterator.
///
/// See [`.powerset()`](../trait.Itertools.html#method.powerset) for more
/// information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct Powerset<I: Iterator> {
inner: Inner<I>,
// Position from start of iteration, used for first element and size-hint.
pos: usize,
}

impl<I> Clone for Powerset<I>
where I: Clone + Iterator,
I::Item: Clone,
{
clone_fields!(inner, pos);
}

impl<I> fmt::Debug for Powerset<I>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
{
debug_fmt_fields!(Powerset, inner, pos);
}

/// Create a new `Powerset` from a cloneable iterator.
pub fn powerset<I>(src: I) -> Powerset<I>
where I: Iterator,
I::Item: Clone,
{
Powerset {
inner: Inner::Buffer(src, alloc::vec![]),
pos: 0,
}
}

// Represents the inner state of a `Powerset` iterator.
#[derive(Clone, Debug)]
enum Inner<I: Iterator> {
// Buffering mode, source iterator still yielding elements.
Buffer(I, Vec<I::Item>),
// Combination mode, yielding combinations of buffered elements.
Combs(Combinations<I>),
// Temporary state for in-place switching of Buffer->Combs.
Empty_,
}

impl<I> Iterator for Powerset<I>
where
I: Iterator,
I::Item: Clone,
{
type Item = Vec<I::Item>;

fn next(&mut self) -> Option<Vec<I::Item>> {
let result = match &mut self.inner {
Inner::Buffer(src, buf) => {
// First element is always the empty set.
if self.pos == 0 {
Some(alloc::vec![])
} else if let Some(elt) = src.next() {
// Return source item as subset with length of one.
buf.push(elt.clone());
Some(alloc::vec![elt])
} else {
// Source iterator is exhausted.
if buf.len() > 1 {
// Begin yielding combinations of buffered elements.
self.upgrade_and_get_next()
} else {
return None;
}
}
},
// Iterating through subset combinations.
Inner::Combs(combs) => {
if let Some(elt) = combs.next() {
Some(elt)
} else {
// Try increasing combination length if not already maxed out.
let maxed = combs.k() >= combs.n();
if !maxed && combs.reset(combs.k() + 1) {
combs.next()
} else {
return None;
}
}
},
Inner::Empty_ => { unreachable!() },
};

debug_assert!(result.is_some());
self.pos = self.pos.saturating_add(1);
result
}

fn size_hint(&self) -> (usize, Option<usize>) {
// Total bounds for source iterator (or return 'empty' hint if we're done).
let src_total = match &self.inner {
Inner::Buffer(src, buf) => size_hint::add_scalar(src.size_hint(), buf.len()),
Inner::Combs(combs) => size_hint::add_scalar(combs.src().size_hint(), combs.n()),
Inner::Empty_ => unreachable!(),
};

// Total bounds for self ( length(powerset(set) == 2 ^ length(set) )
let self_total = size_hint::two_exp(src_total);

if self.pos < usize::MAX {
// Subtract count of already yielded elements from total.
size_hint::sub_scalar(self_total, self.pos)
} else {
// Fallback: self.pos is saturated and no longer reliable.
(0, self_total.1)
}
}
}

impl<I> Powerset<I>
where I: Iterator,
I::Item: Clone,
{
// Upgrades the inner state to Inner::Combinations and returns the first
// combination, should only be called when self.inner value is Inner::Buffer.
#[inline]
fn upgrade_and_get_next(&mut self) -> Option<Vec<I::Item>> {
// Switch out inner enum with dummy value, reusing its contents.
let inner = mem::replace(&mut self.inner, Inner::Empty_);

if let Inner::Buffer(src, buf) = inner {
let lazy_buf = LazyBuffer::from_parts(src, true, buf);
let mut combs = Combinations::from_pool(lazy_buf, 2);

let result = combs.next();
self.inner = Inner::Combs(combs);

result
} else {
None
}
}
}
25 changes: 25 additions & 0 deletions src/size_hint.rs
Expand Up @@ -74,6 +74,31 @@ pub fn mul_scalar(sh: SizeHint, x: usize) -> SizeHint {
(low, hi)
}

/// Raise two correctly by a **SizeHint** exponent.
#[inline]
#[allow(dead_code)]
pub fn two_exp(exp: SizeHint) -> SizeHint {
let (mut low, mut hi) = exp;


// saturating 'raise two by exponent'
let shl_range = 1_usize.leading_zeros() as usize;
low = if low <= shl_range {
1 << low
} else {
usize::MAX
};

// checked 'raise two by exponent'
hi = match hi {
Some(hi) if hi <= shl_range => Some(1 << hi),
_ => None
};

(low, hi)
}


/// Return the maximum
#[inline]
pub fn max(a: SizeHint, b: SizeHint) -> SizeHint {
Expand Down
7 changes: 7 additions & 0 deletions tests/quick.rs
Expand Up @@ -907,6 +907,13 @@ quickcheck! {
}
}

quickcheck! {
fn size_powerset(it: Iter<u8, Exact>) -> bool {
// Powerset cardinality gets large very quickly, limit input to keep test fast
correct_size_hint(it.take(12).powerset())
}
}

quickcheck! {
fn size_unique(it: Iter<i8>) -> bool {
correct_size_hint(it.unique())
Expand Down

0 comments on commit 73b3a70

Please sign in to comment.