Skip to content

Commit

Permalink
Merge #423
Browse files Browse the repository at this point in the history
423: Added `Itertools::dedup_with_count()` and `Itertools::dedup_by_with_count()` r=jswrenn a=orium

Fixes #393.

Co-authored-by: Diogo Sousa <diogogsousa@gmail.com>
  • Loading branch information
bors[bot] and orium committed May 18, 2020
2 parents d081998 + 2508d87 commit 2d22ff9
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 12 deletions.
93 changes: 83 additions & 10 deletions src/adaptors/mod.rs
Expand Up @@ -10,7 +10,7 @@ pub use self::multi_product::*;

use std::fmt;
use std::mem::replace;
use std::iter::{Fuse, Peekable, FromIterator};
use std::iter::{Fuse, Peekable, FromIterator, FusedIterator};
use std::marker::PhantomData;
use crate::size_hint;

Expand Down Expand Up @@ -310,13 +310,13 @@ pub fn cartesian_product<I, J>(mut i: I, j: J) -> Product<I, J>
}
}


impl<I, J> Iterator for Product<I, J>
where I: Iterator,
J: Clone + Iterator,
I::Item: Clone
{
type Item = (I::Item, J::Item);

fn next(&mut self) -> Option<(I::Item, J::Item)> {
let elt_b = match self.b.next() {
None => {
Expand Down Expand Up @@ -607,18 +607,18 @@ impl<I, J, F> Iterator for MergeBy<I, J, F>
}

#[derive(Clone, Debug)]
pub struct CoalesceCore<I>
pub struct CoalesceCore<I, T>
where I: Iterator
{
iter: I,
last: Option<I::Item>,
last: Option<T>,
}

impl<I> CoalesceCore<I>
impl<I, T> CoalesceCore<I, T>
where I: Iterator
{
fn next_with<F>(&mut self, mut f: F) -> Option<I::Item>
where F: FnMut(I::Item, I::Item) -> Result<I::Item, (I::Item, I::Item)>
fn next_with<F>(&mut self, mut f: F) -> Option<T>
where F: FnMut(T, I::Item) -> Result<T, (T, T)>
{
// this fuses the iterator
let mut last = match self.last.take() {
Expand Down Expand Up @@ -652,7 +652,7 @@ impl<I> CoalesceCore<I>
pub struct Coalesce<I, F>
where I: Iterator
{
iter: CoalesceCore<I>,
iter: CoalesceCore<I, I::Item>,
f: F,
}

Expand Down Expand Up @@ -705,7 +705,7 @@ impl<I, F> Iterator for Coalesce<I, F>
pub struct DedupBy<I, Pred>
where I: Iterator
{
iter: CoalesceCore<I>,
iter: CoalesceCore<I, I::Item>,
dedup_pred: Pred,
}

Expand All @@ -718,7 +718,7 @@ pub struct DedupEq;

impl<T: PartialEq> DedupPredicate<T> for DedupEq {
fn dedup_pair(&mut self, a: &T, b: &T) -> bool {
a==b
a == b
}
}

Expand Down Expand Up @@ -803,6 +803,79 @@ impl<I, Pred> Iterator for DedupBy<I, Pred>
}
}

/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
/// repeated elements were present. This will determine equality using a comparison function.
///
/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or
/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct DedupByWithCount<I, Pred>
where I: Iterator
{
iter: CoalesceCore<I, (usize, I::Item)>,
dedup_pred: Pred,
}

/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
/// repeated elements were present.
///
/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
pub type DedupWithCount<I> = DedupByWithCount<I, DedupEq>;

/// Create a new `DedupByWithCount`.
pub fn dedup_by_with_count<I, Pred>(mut iter: I, dedup_pred: Pred) -> DedupByWithCount<I, Pred>
where I: Iterator,
{
DedupByWithCount {
iter: CoalesceCore {
last: iter.next().map(|v| (1, v)),
iter,
},
dedup_pred,
}
}

/// Create a new `DedupWithCount`.
pub fn dedup_with_count<I>(iter: I) -> DedupWithCount<I>
where I: Iterator
{
dedup_by_with_count(iter, DedupEq)
}

impl<I, Pred> fmt::Debug for DedupByWithCount<I, Pred>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
{
debug_fmt_fields!(Dedup, iter);
}

impl<I: Clone, Pred: Clone> Clone for DedupByWithCount<I, Pred>
where I: Iterator,
I::Item: Clone,
{
clone_fields!(iter, dedup_pred);
}

impl<I, Pred> Iterator for DedupByWithCount<I, Pred>
where I: Iterator,
Pred: DedupPredicate<I::Item>,
{
type Item = (usize, I::Item);

fn next(&mut self) -> Option<(usize, I::Item)> {
let ref mut dedup_pred = self.dedup_pred;
self.iter.next_with(|(c, x), y| {
if dedup_pred.dedup_pair(&x, &y) { Ok((c + 1, x)) } else { Err(((c, x), (1, y))) }
})
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}

impl<I: Iterator, Pred: DedupPredicate<I::Item>> FusedIterator for DedupByWithCount<I, Pred> {}

/// An iterator adaptor that borrows from a `Clone`-able iterator
/// to only pick off elements while the predicate returns `true`.
///
Expand Down
49 changes: 47 additions & 2 deletions src/lib.rs
Expand Up @@ -78,6 +78,8 @@ pub mod structs {
pub use crate::adaptors::{
Dedup,
DedupBy,
DedupWithCount,
DedupByWithCount,
Interleave,
InterleaveShortest,
Product,
Expand Down Expand Up @@ -827,7 +829,6 @@ pub trait Itertools : Iterator {
merge_join_by(self, other, cmp_fn)
}


/// Return an iterator adaptor that flattens an iterator of iterators by
/// merging them in ascending order.
///
Expand Down Expand Up @@ -1008,7 +1009,7 @@ pub trait Itertools : Iterator {
/// use itertools::Itertools;
///
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1),
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1),
/// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]);
/// ```
fn dedup_by<Cmp>(self, cmp: Cmp) -> DedupBy<Self, Cmp>
Expand All @@ -1018,6 +1019,50 @@ pub trait Itertools : Iterator {
adaptors::dedup_by(self, cmp)
}

/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
/// how many repeated elements were present.
/// If the iterator is sorted, all elements will be unique.
///
/// Iterator element type is `(usize, Self::Item)`.
///
/// This iterator is *fused*.
///
/// ```
/// use itertools::Itertools;
///
/// let data = vec![1., 1., 2., 3., 3., 2., 2.];
/// itertools::assert_equal(data.into_iter().dedup_with_count(),
/// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]);
/// ```
fn dedup_with_count(self) -> DedupWithCount<Self>
where Self: Sized,
{
adaptors::dedup_with_count(self)
}

/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
/// how many repeated elements were present.
/// This will determine equality using a comparison function.
/// If the iterator is sorted, all elements will be unique.
///
/// Iterator element type is `(usize, Self::Item)`.
///
/// This iterator is *fused*.
///
/// ```
/// use itertools::Itertools;
///
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
/// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1),
/// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]);
/// ```
fn dedup_by_with_count<Cmp>(self, cmp: Cmp) -> DedupByWithCount<Self, Cmp>
where Self: Sized,
Cmp: FnMut(&Self::Item, &Self::Item) -> bool,
{
adaptors::dedup_by_with_count(self, cmp)
}

/// Return an iterator adaptor that filters out elements that have
/// already been produced once during the iteration. Duplicates
/// are detected using hash and equality.
Expand Down
27 changes: 27 additions & 0 deletions tests/test_std.rs
Expand Up @@ -115,6 +115,33 @@ fn dedup_by() {
assert_eq!(&xs_d, &ys);
}

#[test]
fn dedup_with_count() {
let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3];
let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());

let xs: [i32; 5] = [0, 0, 0, 0, 0];
let ys: [(usize, &i32); 1] = [(5, &0)];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
}


#[test]
fn dedup_by_with_count() {
let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)];
let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1));

let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)];
let ys = [( 5, &(0, 1))];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0));
}

#[test]
fn all_equal() {
assert!("".chars().all_equal());
Expand Down

0 comments on commit 2d22ff9

Please sign in to comment.