Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Array combinations #546

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ test = false

[dependencies]
either = { version = "1.0", default-features = false }
array-init = "2.0"

[dev-dependencies]
rand = "0.7"
Expand Down
50 changes: 50 additions & 0 deletions benches/tuple_combinations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,51 @@ fn tuple_comb_c4(c: &mut Criterion) {
});
}


fn array_comb_c1(c: &mut Criterion) {
c.bench_function("array comb c1", move |b| {
b.iter(|| {
for [i] in (0..N1).array_combinations() {
black_box(i);
}
})
});
}


fn array_comb_c2(c: &mut Criterion) {
c.bench_function("array comb c2", move |b| {
b.iter(|| {
for [i, j] in (0..N2).array_combinations() {
black_box(i + j);
}
})
});
}


fn array_comb_c3(c: &mut Criterion) {
c.bench_function("array comb c3", move |b| {
b.iter(|| {
for [i, j, k] in (0..N3).array_combinations() {
black_box(i + j + k);
}
})
});
}


fn array_comb_c4(c: &mut Criterion) {
c.bench_function("array comb c4", move |b| {
b.iter(|| {
for [i, j, k, l] in (0..N4).array_combinations() {
black_box(i + j + k + l);
}
})
});
}


criterion_group!(
benches,
tuple_comb_for1,
Expand All @@ -109,5 +154,10 @@ criterion_group!(
tuple_comb_c2,
tuple_comb_c3,
tuple_comb_c4,
array_comb_c1,
array_comb_c2,
array_comb_c3,
array_comb_c4,
);

criterion_main!(benches);
66 changes: 66 additions & 0 deletions src/adaptors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,72 @@ impl_tuple_combination!(Tuple10Combination Tuple9Combination; a b c d e f g h i)
impl_tuple_combination!(Tuple11Combination Tuple10Combination; a b c d e f g h i j);
impl_tuple_combination!(Tuple12Combination Tuple11Combination; a b c d e f g h i j k);

/// An iterator to iterate through all combinations in an iterator that produces arrays
/// of a specific size.
///
/// See [`.array_combinations()`](crate::Itertools::array_combinations) for more
/// information.
#[derive(Debug, Clone)]
pub struct ArrayCombinations<I: Iterator, const R: usize> {
iter: I,
buf: Vec<I::Item>,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better to use an ArrayVec here, and so avoid heap allocations entirely?

Copy link
Member

@phimuemue phimuemue Sep 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, buf caches the elements from iter, so I think it is impossible to impose an upper bound on the length here.

However, and if my suspicion is correct, I am thinking if we should actually use LazyBuffer here - unsure about that.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, quite right.

Actually, my current use-case is to generate pairs of items from an iterator over a large array; so buffering all items is quite costly... in this case, my iterator implements Clone so I can instead hold R clones of the iterator and entirely avoiding dynamic/heap allocations:

struct ArrayCombinations<I: Iterator, const R: usize> {
    /// Each element of this array is an iterator that can be inspected for
    /// the corresponding element of the resulting combo.
    ///
    /// Peeking each iterator yields the last emitted value, such that
    /// invoking `next()` first advances the state before emitting the
    /// result.  For example, given `[1,2,3,4,5].array_combinations::<3>()`,
    /// the initial state of `iters` is such that peeking each iterator element
    /// will yield `[1, 2, 2]` (becoming `[1, 2, 3]` after `next()` is called).
    iters: [Peekable<I>; R],
}

impl<I, const R: usize> ArrayCombinations<I, R>
where
    I: Iterator + Clone,
    I::Item: Clone,
{
    pub fn new(iter: I) -> Self {
        let mut iters = ArrayVec::new();
        iters.push(iter.peekable());
        for idx in 1..R {
            let last = iters.last_mut().unwrap();
            if idx > 1 {
                last.next();
            }
            let last = last.clone();
            iters.push(last);
        }
        Self { iters: iters.into_inner().ok().unwrap() }
    }
}

impl<I, const R: usize> Iterator for ArrayCombinations<I, R>
where
    I: Iterator + Clone,
    I::Item: Clone,
{
    type Item = [I::Item; R];

    fn next(&mut self) -> Option<Self::Item> {
        'search: for idx in (0..R).rev() {
            self.iters[idx].next();
            if self.iters[idx].peek().is_some() {
                let mut clone = self.iters[idx].clone();
                for reset in self.iters[idx+1..].iter_mut() {
                    clone.next();
                    match clone.peek() {
                        Some(_) => *reset = clone.clone(),
                        None => continue 'search,
                    }
                }
                break;
            }
        }

        self.iters
            .iter_mut()
            .map(Peekable::peek)
            .map(Option::<&_>::cloned)
            .collect::<Option<ArrayVec<_, R>>>()
            .map(ArrayVec::into_inner)
            .map(Result::ok)
            .flatten()
    }
}

indices: [usize; R],
}

impl<I: Iterator, const R: usize> ArrayCombinations<I, R> {
/// Create a new `ArrayCombinations` from an iterator.
pub fn new(iter: I) -> Self {
let indices = array_init::array_init(|i| i);
let buf = Vec::new();

Self { iter, buf, indices }
}
}

impl<I: Iterator, const R: usize> Iterator for ArrayCombinations<I, R>
where
I::Item: Clone,
{
type Item = [I::Item; R];

fn next(&mut self) -> Option<Self::Item> {
if self.buf.is_empty() {
// If the buffer is empty, this is the first invocation of next
for _ in 0..R {
// If the source iter returns None, we won't have enough data
// for even 1 complete combination. So we can bail.
self.buf.push(self.iter.next()?);
}
} else if self.indices[0] + R == self.buf.len() {
// If the first index is as close to the end as possible
// eg: [0, 1, 2, 3, 4, 5]
// ^ ^ ^
// then we can try get some more data. If there's no more data left
// then we've gone over all combinations of the underlying
// and we can bail
self.buf.push(self.iter.next()?);

// Reset the indices
for i in 0..R - 1 {
self.indices[i] = i;
}
self.indices[R - 1] += 1;
} else {
let mut i = R - 2;
while i > 0 && self.indices[i] + R == self.buf.len() + i {
i -= 1;
}

self.indices[i] += 1;
for j in i + 1..R-1 {
self.indices[j] = self.indices[j - 1] + 1;
}
}

Some(array_init::array_init(|i| self.buf[self.indices[i]].clone()))
}
}

/// An iterator adapter to filter values within a nested `Result::Ok`.
///
/// See [`.filter_ok()`](crate::Itertools::filter_ok) for more information.
Expand Down
40 changes: 40 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ pub mod structs {
WhileSome,
Coalesce,
TupleCombinations,
ArrayCombinations,
Positions,
Update,
};
Expand Down Expand Up @@ -1449,6 +1450,45 @@ pub trait Itertools : Iterator {
adaptors::tuple_combinations(self)
}

/// Return an iterator adaptor that iterates over the combinations of the
/// elements from an iterator.
///
/// Iterator element can be any array of type `Self::Item`
///
/// ```
/// use itertools::Itertools;
///
/// let mut v = Vec::new();
/// for [a, b] in (1..5).array_combinations() {
/// v.push((a, b));
/// }
/// assert_eq!(v, vec![(1, 2), (1, 3), (2, 3), (1, 4), (2, 4), (3, 4)]);
///
/// let mut it = (1..5).array_combinations();
/// assert_eq!(Some([1, 2, 3]), it.next());
/// assert_eq!(Some([1, 2, 4]), it.next());
/// assert_eq!(Some([1, 3, 4]), it.next());
/// assert_eq!(Some([2, 3, 4]), it.next());
/// assert_eq!(None, it.next());
///
/// // this requires a type hint
/// let it = (1..5).array_combinations::<3>();
/// itertools::assert_equal(it, vec![[1, 2, 3], [1, 2, 4], [1, 3, 4], [2, 3, 4]]);
///
/// // you can also specify the complete type
/// use itertools::ArrayCombinations;
/// use std::ops::RangeFrom;
///
/// let it: ArrayCombinations<RangeFrom<u32>, 2> = (1..).array_combinations();
/// itertools::assert_equal(it.take(6), vec![[1, 2], [1, 3], [2, 3], [1, 4], [2, 4], [3, 4]]);
/// ```
fn array_combinations<const R: usize>(self) -> ArrayCombinations<Self, R>
where Self: Sized,
Self::Item: Clone,
{
ArrayCombinations::new(self)
}

/// Return an iterator adaptor that iterates over the `k`-length combinations of
/// the elements from an iterator.
///
Expand Down
16 changes: 16 additions & 0 deletions tests/quick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,22 @@ quickcheck! {
}
}

quickcheck! {
fn equal_combinations_array(it: Iter<i16>) -> bool {
let values = it.clone().collect_vec();
let mut cmb = it.array_combinations();
for j in 1..values.len() {
for i in 0..j {
let pair = [values[i], values[j]];
if pair != cmb.next().unwrap() {
return false;
}
}
}
cmb.next() == None
}
}

quickcheck! {
fn size_pad_tail(it: Iter<i8>, pad: u8) -> bool {
correct_size_hint(it.clone().pad_using(pad as usize, |_| 0)) &&
Expand Down