Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Itertools::dedup_with_count() and Itertools::dedup_by_with_count() #423

Merged
merged 2 commits into from May 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
93 changes: 83 additions & 10 deletions src/adaptors/mod.rs
Expand Up @@ -10,7 +10,7 @@ pub use self::multi_product::*;

use std::fmt;
use std::mem::replace;
use std::iter::{Fuse, Peekable, FromIterator};
use std::iter::{Fuse, Peekable, FromIterator, FusedIterator};
use std::marker::PhantomData;
use crate::size_hint;

Expand Down Expand Up @@ -310,13 +310,13 @@ pub fn cartesian_product<I, J>(mut i: I, j: J) -> Product<I, J>
}
}


impl<I, J> Iterator for Product<I, J>
where I: Iterator,
J: Clone + Iterator,
I::Item: Clone
{
type Item = (I::Item, J::Item);

fn next(&mut self) -> Option<(I::Item, J::Item)> {
let elt_b = match self.b.next() {
None => {
Expand Down Expand Up @@ -607,18 +607,18 @@ impl<I, J, F> Iterator for MergeBy<I, J, F>
}

#[derive(Clone, Debug)]
pub struct CoalesceCore<I>
pub struct CoalesceCore<I, T>
where I: Iterator
{
iter: I,
last: Option<I::Item>,
last: Option<T>,
}

impl<I> CoalesceCore<I>
impl<I, T> CoalesceCore<I, T>
where I: Iterator
{
fn next_with<F>(&mut self, mut f: F) -> Option<I::Item>
where F: FnMut(I::Item, I::Item) -> Result<I::Item, (I::Item, I::Item)>
fn next_with<F>(&mut self, mut f: F) -> Option<T>
where F: FnMut(T, I::Item) -> Result<T, (T, T)>
{
// this fuses the iterator
let mut last = match self.last.take() {
Expand Down Expand Up @@ -652,7 +652,7 @@ impl<I> CoalesceCore<I>
pub struct Coalesce<I, F>
where I: Iterator
{
iter: CoalesceCore<I>,
iter: CoalesceCore<I, I::Item>,
f: F,
}

Expand Down Expand Up @@ -705,7 +705,7 @@ impl<I, F> Iterator for Coalesce<I, F>
pub struct DedupBy<I, Pred>
where I: Iterator
{
iter: CoalesceCore<I>,
iter: CoalesceCore<I, I::Item>,
dedup_pred: Pred,
}

Expand All @@ -718,7 +718,7 @@ pub struct DedupEq;

impl<T: PartialEq> DedupPredicate<T> for DedupEq {
fn dedup_pair(&mut self, a: &T, b: &T) -> bool {
a==b
a == b
}
}

Expand Down Expand Up @@ -803,6 +803,79 @@ impl<I, Pred> Iterator for DedupBy<I, Pred>
}
}

/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
/// repeated elements were present. This will determine equality using a comparison function.
///
/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or
/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct DedupByWithCount<I, Pred>
jswrenn marked this conversation as resolved.
Show resolved Hide resolved
where I: Iterator
{
iter: CoalesceCore<I, (usize, I::Item)>,
dedup_pred: Pred,
}

/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
/// repeated elements were present.
///
/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
pub type DedupWithCount<I> = DedupByWithCount<I, DedupEq>;

/// Create a new `DedupByWithCount`.
pub fn dedup_by_with_count<I, Pred>(mut iter: I, dedup_pred: Pred) -> DedupByWithCount<I, Pred>
where I: Iterator,
{
DedupByWithCount {
iter: CoalesceCore {
last: iter.next().map(|v| (1, v)),
iter,
},
dedup_pred,
}
}

/// Create a new `DedupWithCount`.
pub fn dedup_with_count<I>(iter: I) -> DedupWithCount<I>
where I: Iterator
{
dedup_by_with_count(iter, DedupEq)
}

impl<I, Pred> fmt::Debug for DedupByWithCount<I, Pred>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
{
debug_fmt_fields!(Dedup, iter);
}

impl<I: Clone, Pred: Clone> Clone for DedupByWithCount<I, Pred>
where I: Iterator,
I::Item: Clone,
{
clone_fields!(iter, dedup_pred);
}

impl<I, Pred> Iterator for DedupByWithCount<I, Pred>
where I: Iterator,
Pred: DedupPredicate<I::Item>,
{
type Item = (usize, I::Item);

fn next(&mut self) -> Option<(usize, I::Item)> {
let ref mut dedup_pred = self.dedup_pred;
self.iter.next_with(|(c, x), y| {
if dedup_pred.dedup_pair(&x, &y) { Ok((c + 1, x)) } else { Err(((c, x), (1, y))) }
})
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}

impl<I: Iterator, Pred: DedupPredicate<I::Item>> FusedIterator for DedupByWithCount<I, Pred> {}

/// An iterator adaptor that borrows from a `Clone`-able iterator
/// to only pick off elements while the predicate returns `true`.
///
Expand Down
49 changes: 47 additions & 2 deletions src/lib.rs
Expand Up @@ -78,6 +78,8 @@ pub mod structs {
pub use crate::adaptors::{
Dedup,
DedupBy,
DedupWithCount,
DedupByWithCount,
Interleave,
InterleaveShortest,
Product,
Expand Down Expand Up @@ -827,7 +829,6 @@ pub trait Itertools : Iterator {
merge_join_by(self, other, cmp_fn)
}


/// Return an iterator adaptor that flattens an iterator of iterators by
/// merging them in ascending order.
///
Expand Down Expand Up @@ -1008,7 +1009,7 @@ pub trait Itertools : Iterator {
/// use itertools::Itertools;
///
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1),
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1),
/// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]);
/// ```
fn dedup_by<Cmp>(self, cmp: Cmp) -> DedupBy<Self, Cmp>
Expand All @@ -1018,6 +1019,50 @@ pub trait Itertools : Iterator {
adaptors::dedup_by(self, cmp)
}

/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
/// how many repeated elements were present.
/// If the iterator is sorted, all elements will be unique.
///
/// Iterator element type is `(usize, Self::Item)`.
///
/// This iterator is *fused*.
///
/// ```
/// use itertools::Itertools;
///
/// let data = vec![1., 1., 2., 3., 3., 2., 2.];
/// itertools::assert_equal(data.into_iter().dedup_with_count(),
/// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]);
/// ```
fn dedup_with_count(self) -> DedupWithCount<Self>
where Self: Sized,
{
adaptors::dedup_with_count(self)
}

/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
/// how many repeated elements were present.
/// This will determine equality using a comparison function.
/// If the iterator is sorted, all elements will be unique.
///
/// Iterator element type is `(usize, Self::Item)`.
///
/// This iterator is *fused*.
///
/// ```
/// use itertools::Itertools;
///
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
/// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1),
/// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]);
/// ```
fn dedup_by_with_count<Cmp>(self, cmp: Cmp) -> DedupByWithCount<Self, Cmp>
where Self: Sized,
Cmp: FnMut(&Self::Item, &Self::Item) -> bool,
{
adaptors::dedup_by_with_count(self, cmp)
}

/// Return an iterator adaptor that filters out elements that have
/// already been produced once during the iteration. Duplicates
/// are detected using hash and equality.
Expand Down
27 changes: 27 additions & 0 deletions tests/test_std.rs
Expand Up @@ -115,6 +115,33 @@ fn dedup_by() {
assert_eq!(&xs_d, &ys);
}

#[test]
fn dedup_with_count() {
let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3];
let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());

let xs: [i32; 5] = [0, 0, 0, 0, 0];
let ys: [(usize, &i32); 1] = [(5, &0)];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
}


#[test]
fn dedup_by_with_count() {
let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)];
let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1));

let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)];
let ys = [( 5, &(0, 1))];

it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0));
}

#[test]
fn all_equal() {
assert!("".chars().all_equal());
Expand Down