diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 5a84241359b..8aae45b2a45 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -117,12 +117,7 @@ impl BinaryArray { /// Returns an iterator of `Option<&[u8]>` over every element of this array. pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter, BitmapIter> { - let null_count = self.validity.as_ref().map(|validity| validity.unset_bits()); - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - null_count, - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref()) } /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity diff --git a/src/array/boolean/iterator.rs b/src/array/boolean/iterator.rs index 9beb53b1dbb..8243a8d985f 100644 --- a/src/array/boolean/iterator.rs +++ b/src/array/boolean/iterator.rs @@ -22,9 +22,9 @@ impl IntoIterator for BooleanArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); - let null_count = validity.as_ref().map(|validity| validity.unset_bits()); - let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity, null_count) + let validity = + validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter())); + ZipValidity::new(values, validity) } } @@ -42,14 +42,9 @@ impl<'a> MutableBooleanArray { /// Returns an iterator over the optional values of this [`MutableBooleanArray`]. #[inline] pub fn iter(&'a self) -> ZipValidity, BitmapIter<'a>> { - let null_count = self - .validity() - .as_ref() - .map(|validity| validity.unset_bits()); ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), - null_count, ) } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 816e901144e..a3788b9b856 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -88,13 +88,7 @@ impl BooleanArray { /// Returns an iterator over the optional values of this [`BooleanArray`]. #[inline] pub fn iter(&self) -> ZipValidity { - ZipValidity::new( - self.values().iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values().iter(), self.validity()) } /// Returns an iterator over the values of this [`BooleanArray`]. diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 81d2eec354d..8b62c91dce9 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -191,14 +191,7 @@ impl DictionaryArray { /// This function will allocate a new [`Scalar`] per item and is usually not performant. /// Consider calling `keys_iter` and `values`, downcasting `values`, and iterating over that. pub fn iter(&self) -> ZipValidity, DictionaryValuesIter, BitmapIter> { - ZipValidity::new( - DictionaryValuesIter::new(self), - self.keys.validity().as_ref().map(|x| x.iter()), - self.keys - .validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(DictionaryValuesIter::new(self), self.keys.validity()) } /// Returns an iterator of [`Box`] diff --git a/src/array/fixed_size_binary/iterator.rs b/src/array/fixed_size_binary/iterator.rs index 2e4acaf60f2..0d456e0ba35 100644 --- a/src/array/fixed_size_binary/iterator.rs +++ b/src/array/fixed_size_binary/iterator.rs @@ -19,11 +19,7 @@ impl<'a> FixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity()) } /// Returns iterator over the values of [`FixedSizeBinaryArray`] @@ -46,13 +42,7 @@ impl<'a> MutableFixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> { - ZipValidity::new( - self.iter_values(), - self.validity().as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new(self.iter_values(), self.validity().map(|x| x.iter())) } /// Returns iterator over the values of [`MutableFixedSizeBinaryArray`] diff --git a/src/array/fixed_size_list/iterator.rs b/src/array/fixed_size_list/iterator.rs index 046050d5671..2f2b70b2eff 100644 --- a/src/array/fixed_size_list/iterator.rs +++ b/src/array/fixed_size_list/iterator.rs @@ -36,13 +36,7 @@ impl<'a> IntoIterator for &'a FixedSizeListArray { impl<'a> FixedSizeListArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a> { - ZipValidity::new( - FixedSizeListValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(FixedSizeListValuesIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/list/iterator.rs b/src/array/list/iterator.rs index 8fff5fa8204..d0b7d22455b 100644 --- a/src/array/list/iterator.rs +++ b/src/array/list/iterator.rs @@ -35,13 +35,7 @@ impl<'a, O: Offset> IntoIterator for &'a ListArray { impl<'a, O: Offset> ListArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a, O> { - ZipValidity::new( - ListValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(ListValuesIter::new(self), self.validity.as_ref()) } /// Returns an iterator of `Box` diff --git a/src/array/map/iterator.rs b/src/array/map/iterator.rs index c20324a5d1e..5867372c886 100644 --- a/src/array/map/iterator.rs +++ b/src/array/map/iterator.rs @@ -72,11 +72,7 @@ impl<'a> IntoIterator for &'a MapArray { impl<'a> MapArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipValidity, MapValuesIter<'a>, BitmapIter<'a>> { - ZipValidity::new( - MapValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|x| x.unset_bits()), - ) + ZipValidity::new_with_validity(MapValuesIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/primitive/iterator.rs b/src/array/primitive/iterator.rs index 7b4a40a0355..18e213b563d 100644 --- a/src/array/primitive/iterator.rs +++ b/src/array/primitive/iterator.rs @@ -16,9 +16,9 @@ impl IntoIterator for PrimitiveArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); - let null_count = validity.as_ref().map(|x| x.unset_bits()); - let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity, null_count) + let validity = + validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter())); + ZipValidity::new(values, validity) } } @@ -39,9 +39,6 @@ impl<'a, T: NativeType> MutablePrimitiveArray { ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index c27e98395fb..a64cd90f38a 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -141,11 +141,7 @@ impl PrimitiveArray { /// Returns an iterator over the values and validity, `Option<&T>`. #[inline] pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter, BitmapIter> { - ZipValidity::new( - self.values().iter(), - self.validity().as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values().iter(), self.validity()) } /// Returns an iterator of the values, `&T`, ignoring the arrays' validity. diff --git a/src/array/struct_/iterator.rs b/src/array/struct_/iterator.rs index 884cc300126..2b6849d2e26 100644 --- a/src/array/struct_/iterator.rs +++ b/src/array/struct_/iterator.rs @@ -89,13 +89,7 @@ impl<'a> IntoIterator for &'a StructArray { impl<'a> StructArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a> { - ZipValidity::new( - StructValueIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(StructValueIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 84ab1b941f0..5e057e90a5b 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -133,11 +133,7 @@ impl Utf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter, BitmapIter> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity()) } /// Returns an iterator of `&str` diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index c993ad18f8b..4cc83da9011 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -206,11 +206,7 @@ impl MutableUtf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter, BitmapIter> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) } /// Converts itself into an [`Array`]. diff --git a/src/bitmap/utils/zip_validity.rs b/src/bitmap/utils/zip_validity.rs index 201cb071ae2..d43802333a3 100644 --- a/src/bitmap/utils/zip_validity.rs +++ b/src/bitmap/utils/zip_validity.rs @@ -1,3 +1,5 @@ +use crate::bitmap::utils::BitmapIter; +use crate::bitmap::Bitmap; use crate::trusted_len::TrustedLen; /// An [`Iterator`] over validity and values. @@ -101,12 +103,24 @@ where V: Iterator, { /// Returns a new [`ZipValidity`] - pub fn new(values: I, validity: Option, null_count: Option) -> Self { + pub fn new(values: I, validity: Option) -> Self { match validity { - // only if we have a validity and there are nulls we will iterate them - Some(validity) if null_count != Some(0) => { - Self::Optional(ZipValidityIter::new(values, validity)) - } + Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)), + _ => Self::Required(values), + } + } +} + +impl<'a, T, I> ZipValidity> +where + I: Iterator, +{ + /// Returns a new [`ZipValidity`] and drops the `validity` if all values + /// are valid. + pub fn new_with_validity(values: I, validity: Option<&'a Bitmap>) -> Self { + // only if the validity has nulls we take the optional branch. + match validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.iter())) { + Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)), _ => Self::Required(values), } } diff --git a/src/io/avro/write/serialize.rs b/src/io/avro/write/serialize.rs index 30e31adaed1..cb94b78d28b 100644 --- a/src/io/avro/write/serialize.rs +++ b/src/io/avro/write/serialize.rs @@ -126,11 +126,7 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray, schema: &AvroSchema) -> .offsets() .windows(2) .map(|w| (w[1] - w[0]).to_usize() as i64); - let lengths = ZipValidity::new( - lengths, - array.validity().as_ref().map(|x| x.iter()), - array.validity().as_ref().map(|x| x.unset_bits()), - ); + let lengths = ZipValidity::new_with_validity(lengths, array.validity()); Box::new(BufStreamingIterator::new( lengths, @@ -184,11 +180,7 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer .map(|(x, schema)| new_serializer(x.as_ref(), schema)) .collect::>(); - let iterator = ZipValidity::new( - 0..array.len(), - array.validity().as_ref().map(|x| x.iter()), - array.validity().as_ref().map(|x| x.unset_bits()), - ); + let iterator = ZipValidity::new_with_validity(0..array.len(), array.validity()); Box::new(BufStreamingIterator::new( iterator, diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 288c23f235b..6b8d93e81f7 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -103,11 +103,7 @@ fn struct_serializer<'a>( let names = array.fields().iter().map(|f| f.name.as_str()); Box::new(BufStreamingIterator::new( - ZipValidity::new( - 0..array.len(), - array.validity().map(|x| x.iter()), - array.validity().map(|x| x.unset_bits()), - ), + ZipValidity::new_with_validity(0..array.len(), array.validity()), move |maybe, buf| { if maybe.is_some() { let names = names.clone(); diff --git a/tests/it/bitmap/utils/zip_validity.rs b/tests/it/bitmap/utils/zip_validity.rs index c86240eb0c2..9c6187f60a6 100644 --- a/tests/it/bitmap/utils/zip_validity.rs +++ b/tests/it/bitmap/utils/zip_validity.rs @@ -8,7 +8,7 @@ fn basic() { let a = Bitmap::from([true, false]); let a = Some(a.iter()); let values = vec![0, 1]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), None]); @@ -19,7 +19,7 @@ fn complete() { let a = Bitmap::from([true, false, true, false, true, false, true, false]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -31,7 +31,6 @@ fn complete() { #[test] fn slices() { let a = Bitmap::from([true, false]); - let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let offsets = vec![0, 2, 3]; let values = vec![1, 2, 3]; @@ -40,7 +39,7 @@ fn slices() { let end = x[1]; &values[start..end] }); - let zip = ZipValidity::new(iter, a, null_count); + let zip = ZipValidity::new(iter, a); let a = zip.collect::>(); assert_eq!(a, vec![Some([1, 2].as_ref()), None]); @@ -51,7 +50,7 @@ fn byte() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7, 8]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -75,7 +74,7 @@ fn offset() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -87,7 +86,7 @@ fn offset() { #[test] fn none() { let values = vec![0, 1, 2]; - let zip = ZipValidity::new(values.into_iter(), None::, None); + let zip = ZipValidity::new(values.into_iter(), None::); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), Some(1), Some(2)]); @@ -96,10 +95,9 @@ fn none() { #[test] fn rev() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); - let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, null_count); + let zip = ZipValidity::new(values.into_iter(), a); let result = zip.rev().collect::>(); let expected = vec![None, Some(1), None, None, Some(4), Some(5), None, Some(7)]