diff --git a/src/adaptors/mod.rs b/src/adaptors/mod.rs index 7d61f117c..53775027b 100644 --- a/src/adaptors/mod.rs +++ b/src/adaptors/mod.rs @@ -10,7 +10,7 @@ pub use self::multi_product::*; use std::fmt; use std::mem::replace; -use std::iter::{Fuse, Peekable, FromIterator}; +use std::iter::{Fuse, Peekable, FromIterator, FusedIterator}; use std::marker::PhantomData; use crate::size_hint; @@ -803,6 +803,88 @@ impl Iterator for DedupBy } } +/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many +/// repeated elements were present. This will determine equality using a comparison function. +/// +/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or +/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information. +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct DedupByWithCount + where I: Iterator +{ + iter: Peekable, + dedup_pred: Pred, +} + +/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many +/// repeated elements were present. +/// +/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information. +pub type DedupWithCount = DedupByWithCount; + +/// Create a new `DedupByWithCount`. +pub fn dedup_by_with_count(iter: I, dedup_pred: Pred) -> DedupByWithCount + where I: Iterator, +{ + DedupByWithCount { + iter: iter.peekable(), + dedup_pred, + } +} + +/// Create a new `DedupWithCount`. +pub fn dedup_with_count(iter: I) -> DedupWithCount + where I: Iterator +{ + dedup_by_with_count(iter, DedupEq) +} + +impl fmt::Debug for DedupByWithCount + where I: Iterator + fmt::Debug, + I::Item: fmt::Debug, +{ + debug_fmt_fields!(Dedup, iter); +} + +impl Clone for DedupByWithCount + where I: Iterator, + I::Item: Clone, +{ + clone_fields!(iter, dedup_pred); +} + +impl Iterator for DedupByWithCount + where I: Iterator, + Pred: DedupPredicate, +{ + type Item = (usize, I::Item); + + fn next(&mut self) -> Option<(usize, I::Item)> { + self.iter.next().map(|base| { + let mut count: usize = 1; + + while let Some(next) = self.iter.peek() { + if self.dedup_pred.dedup_pair(&base, next) { + self.iter.next(); + count += 1; + } else { + break; + } + } + + (count, base) + }) + } + + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.size_hint(); + + ((low > 0) as usize, high) + } +} + +impl> FusedIterator for DedupByWithCount {} + /// An iterator adaptor that borrows from a `Clone`-able iterator /// to only pick off elements while the predicate returns `true`. /// diff --git a/src/lib.rs b/src/lib.rs index b8daefda6..b3559b116 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,8 @@ pub mod structs { pub use crate::adaptors::{ Dedup, DedupBy, + DedupWithCount, + DedupByWithCount, Interleave, InterleaveShortest, Product, @@ -970,7 +972,7 @@ pub trait Itertools : Iterator { /// use itertools::Itertools; /// /// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)]; - /// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1), + /// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1), /// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]); /// ``` fn dedup_by(self, cmp: Cmp) -> DedupBy @@ -980,6 +982,50 @@ pub trait Itertools : Iterator { adaptors::dedup_by(self, cmp) } + /// Remove duplicates from sections of consecutive identical elements, while keeping a count of + /// how many repeated elements were present. + /// If the iterator is sorted, all elements will be unique. + /// + /// Iterator element type is `(usize, Self::Item)`. + /// + /// This iterator is *fused*. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let data = vec![1., 1., 2., 3., 3., 2., 2.]; + /// itertools::assert_equal(data.into_iter().dedup_with_count(), + /// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]); + /// ``` + fn dedup_with_count(self) -> DedupWithCount + where Self: Sized, + { + adaptors::dedup_with_count(self) + } + + /// Remove duplicates from sections of consecutive identical elements, while keeping a count of + /// how many repeated elements were present. + /// This will determine equality using a comparison function. + /// If the iterator is sorted, all elements will be unique. + /// + /// Iterator element type is `(usize, Self::Item)`. + /// + /// This iterator is *fused*. + /// + /// ``` + /// use itertools::Itertools; + /// + /// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)]; + /// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1), + /// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]); + /// ``` + fn dedup_by_with_count(self, cmp: Cmp) -> DedupByWithCount + where Self: Sized, + Cmp: FnMut(&Self::Item, &Self::Item) -> bool, + { + adaptors::dedup_by_with_count(self, cmp) + } + /// Return an iterator adaptor that filters out elements that have /// already been produced once during the iteration. Duplicates /// are detected using hash and equality. diff --git a/tests/test_std.rs b/tests/test_std.rs index ba077848c..7a3d2943d 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -114,6 +114,33 @@ fn dedup_by() { assert_eq!(&xs_d, &ys); } +#[test] +fn dedup_with_count() { + let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3]; + let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count()); + + let xs: [i32; 5] = [0, 0, 0, 0, 0]; + let ys: [(usize, &i32); 1] = [(5, &0)]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count()); +} + + +#[test] +fn dedup_by_with_count() { + let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)]; + let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1)); + + let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]; + let ys = [( 5, &(0, 1))]; + + it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0)); +} + #[test] fn all_equal() { assert!("".chars().all_equal());