diff --git a/src/adaptors/mod.rs b/src/adaptors/mod.rs index 7d61f117c..4beaa26ae 100644 --- a/src/adaptors/mod.rs +++ b/src/adaptors/mod.rs @@ -10,7 +10,7 @@ pub use self::multi_product::*; use std::fmt; use std::mem::replace; -use std::iter::{Fuse, Peekable, FromIterator}; +use std::iter::{Fuse, Peekable, FromIterator, FusedIterator}; use std::marker::PhantomData; use crate::size_hint; @@ -310,13 +310,13 @@ pub fn cartesian_product(mut i: I, j: J) -> Product } } - impl Iterator for Product where I: Iterator, J: Clone + Iterator, I::Item: Clone { type Item = (I::Item, J::Item); + fn next(&mut self) -> Option<(I::Item, J::Item)> { let elt_b = match self.b.next() { None => { @@ -607,18 +607,18 @@ impl Iterator for MergeBy } #[derive(Clone, Debug)] -pub struct CoalesceCore +pub struct CoalesceCore where I: Iterator { iter: I, - last: Option, + last: Option, } -impl CoalesceCore +impl CoalesceCore where I: Iterator { - fn next_with(&mut self, mut f: F) -> Option - where F: FnMut(I::Item, I::Item) -> Result + fn next_with(&mut self, mut f: F) -> Option + where F: FnMut(T, I::Item) -> Result { // this fuses the iterator let mut last = match self.last.take() { @@ -652,7 +652,7 @@ impl CoalesceCore pub struct Coalesce where I: Iterator { - iter: CoalesceCore, + iter: CoalesceCore, f: F, } @@ -705,7 +705,7 @@ impl Iterator for Coalesce pub struct DedupBy where I: Iterator { - iter: CoalesceCore, + iter: CoalesceCore, dedup_pred: Pred, } @@ -718,7 +718,7 @@ pub struct DedupEq; impl DedupPredicate for DedupEq { fn dedup_pair(&mut self, a: &T, b: &T) -> bool { - a==b + a == b } } @@ -803,6 +803,79 @@ impl Iterator for DedupBy } } +/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many +/// repeated elements were present. This will determine equality using a comparison function. +/// +/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or +/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information. +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct DedupByWithCount + where I: Iterator +{ + iter: CoalesceCore, + dedup_pred: Pred, +} + +/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many +/// repeated elements were present. +/// +/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information. +pub type DedupWithCount = DedupByWithCount; + +/// Create a new `DedupByWithCount`. +pub fn dedup_by_with_count(mut iter: I, dedup_pred: Pred) -> DedupByWithCount + where I: Iterator, +{ + DedupByWithCount { + iter: CoalesceCore { + last: iter.next().map(|v| (1, v)), + iter, + }, + dedup_pred, + } +} + +/// Create a new `DedupWithCount`. +pub fn dedup_with_count(iter: I) -> DedupWithCount + where I: Iterator +{ + dedup_by_with_count(iter, DedupEq) +} + +impl fmt::Debug for DedupByWithCount + where I: Iterator + fmt::Debug, + I::Item: fmt::Debug, +{ + debug_fmt_fields!(Dedup, iter); +} + +impl Clone for DedupByWithCount + where I: Iterator, + I::Item: Clone, +{ + clone_fields!(iter, dedup_pred); +} + +impl Iterator for DedupByWithCount + where I: Iterator, + Pred: DedupPredicate, +{ + type Item = (usize, I::Item); + + fn next(&mut self) -> Option<(usize, I::Item)> { + let ref mut dedup_pred = self.dedup_pred; + self.iter.next_with(|(c, x), y| { + if dedup_pred.dedup_pair(&x, &y) { Ok((c + 1, x)) } else { Err(((c, x), (1, y))) } + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl> FusedIterator for DedupByWithCount {} + /// An iterator adaptor that borrows from a `Clone`-able iterator /// to only pick off elements while the predicate returns `true`. /// diff --git a/src/lib.rs b/src/lib.rs index 3f0155750..7a7abb2a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,8 @@ pub mod structs { pub use crate::adaptors::{ Dedup, DedupBy, + DedupWithCount, + DedupByWithCount, Interleave, InterleaveShortest, Product, @@ -827,7 +829,6 @@ pub trait Itertools : Iterator { merge_join_by(self, other, cmp_fn) } - /// Return an iterator adaptor that flattens an iterator of iterators by /// merging them in ascending order. /// @@ -1008,7 +1009,7 @@ pub trait Itertools : Iterator { /// use itertools::Itertools; /// /// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)]; - /// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1), + /// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1), /// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]); /// ``` fn dedup_by(self, cmp: Cmp) -> DedupBy @@ -1018,6 +1019,50 @@ pub trait Itertools : Iterator { adaptors::dedup_by(self, cmp) } + /// Remove duplicates from sections of consecutive identical elements, while keeping a count of + /// how many repeated elements were present. + /// If the iterator is sorted, all elements will be unique. + /// + /// Iterator element type is `(usize, Self::Item)`. + /// + /// This iterator is *fused*. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let data = vec![1., 1., 2., 3., 3., 2., 2.]; + /// itertools::assert_equal(data.into_iter().dedup_with_count(), + /// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]); + /// ``` + fn dedup_with_count(self) -> DedupWithCount + where Self: Sized, + { + adaptors::dedup_with_count(self) + } + + /// Remove duplicates from sections of consecutive identical elements, while keeping a count of + /// how many repeated elements were present. + /// This will determine equality using a comparison function. + /// If the iterator is sorted, all elements will be unique. + /// + /// Iterator element type is `(usize, Self::Item)`. + /// + /// This iterator is *fused*. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)]; + /// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1), + /// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]); + /// ``` + fn dedup_by_with_count(self, cmp: Cmp) -> DedupByWithCount + where Self: Sized, + Cmp: FnMut(&Self::Item, &Self::Item) -> bool, + { + adaptors::dedup_by_with_count(self, cmp) + } + /// Return an iterator adaptor that filters out elements that have /// already been produced once during the iteration. Duplicates /// are detected using hash and equality. diff --git a/tests/test_std.rs b/tests/test_std.rs index cbf8f064c..23e1911e4 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -115,6 +115,33 @@ fn dedup_by() { assert_eq!(&xs_d, &ys); } +#[test] +fn dedup_with_count() { + let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3]; + let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count()); + + let xs: [i32; 5] = [0, 0, 0, 0, 0]; + let ys: [(usize, &i32); 1] = [(5, &0)]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count()); +} + + +#[test] +fn dedup_by_with_count() { + let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)]; + let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1)); + + let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]; + let ys = [( 5, &(0, 1))]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0)); +} + #[test] fn all_equal() { assert!("".chars().all_equal());