Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved ZipValidity iterators
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 29, 2022
1 parent e106cff commit 03e981e
Show file tree
Hide file tree
Showing 17 changed files with 85 additions and 19 deletions.
7 changes: 6 additions & 1 deletion src/array/binary/mod.rs
Expand Up @@ -117,7 +117,12 @@ impl<O: Offset> BinaryArray<O> {

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
let null_count = self.validity.as_ref().map(|validity| validity.unset_bits());
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
null_count,
)
}

/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
Expand Down
8 changes: 7 additions & 1 deletion src/array/boolean/iterator.rs
Expand Up @@ -22,8 +22,9 @@ impl IntoIterator for BooleanArray {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let null_count = validity.as_ref().map(|validity| validity.unset_bits());
let validity = validity.map(|x| x.into_iter());
ZipValidity::new(values, validity)
ZipValidity::new(values, validity, null_count)
}
}

Expand All @@ -41,9 +42,14 @@ impl<'a> MutableBooleanArray {
/// Returns an iterator over the optional values of this [`MutableBooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<bool, BitmapIter<'a>, BitmapIter<'a>> {
let null_count = self
.validity()
.as_ref()
.map(|validity| validity.unset_bits());
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
null_count,
)
}

Expand Down
3 changes: 3 additions & 0 deletions src/array/boolean/mod.rs
Expand Up @@ -91,6 +91,9 @@ impl BooleanArray {
ZipValidity::new(
self.values().iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
4 changes: 4 additions & 0 deletions src/array/dictionary/mod.rs
Expand Up @@ -194,6 +194,10 @@ impl<K: DictionaryKey> DictionaryArray<K> {
ZipValidity::new(
DictionaryValuesIter::new(self),
self.keys.validity().as_ref().map(|x| x.iter()),
self.keys
.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
9 changes: 8 additions & 1 deletion src/array/fixed_size_binary/iterator.rs
Expand Up @@ -19,7 +19,11 @@ impl<'a> FixedSizeBinaryArray {
pub fn iter(
&'a self,
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
}

/// Returns iterator over the values of [`FixedSizeBinaryArray`]
Expand All @@ -45,6 +49,9 @@ impl<'a> MutableFixedSizeBinaryArray {
ZipValidity::new(
self.iter_values(),
self.validity().as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
3 changes: 3 additions & 0 deletions src/array/fixed_size_list/iterator.rs
Expand Up @@ -39,6 +39,9 @@ impl<'a> FixedSizeListArray {
ZipValidity::new(
FixedSizeListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
3 changes: 3 additions & 0 deletions src/array/list/iterator.rs
Expand Up @@ -38,6 +38,9 @@ impl<'a, O: Offset> ListArray<O> {
ZipValidity::new(
ListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
1 change: 1 addition & 0 deletions src/array/map/iterator.rs
Expand Up @@ -75,6 +75,7 @@ impl<'a> MapArray {
ZipValidity::new(
MapValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|x| x.unset_bits()),
)
}

Expand Down
6 changes: 5 additions & 1 deletion src/array/primitive/iterator.rs
Expand Up @@ -16,8 +16,9 @@ impl<T: NativeType> IntoIterator for PrimitiveArray<T> {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let null_count = validity.as_ref().map(|x| x.unset_bits());
let validity = validity.map(|x| x.into_iter());
ZipValidity::new(values, validity)
ZipValidity::new(values, validity, null_count)
}
}

Expand All @@ -38,6 +39,9 @@ impl<'a, T: NativeType> MutablePrimitiveArray<T> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
1 change: 1 addition & 0 deletions src/array/primitive/mod.rs
Expand Up @@ -144,6 +144,7 @@ impl<T: NativeType> PrimitiveArray<T> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
}

Expand Down
3 changes: 3 additions & 0 deletions src/array/struct_/iterator.rs
Expand Up @@ -92,6 +92,9 @@ impl<'a> StructArray {
ZipValidity::new(
StructValueIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
6 changes: 5 additions & 1 deletion src/array/utf8/mod.rs
Expand Up @@ -133,7 +133,11 @@ impl<O: Offset> Utf8Array<O> {

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
}

/// Returns an iterator of `&str`
Expand Down
6 changes: 5 additions & 1 deletion src/array/utf8/mutable.rs
Expand Up @@ -206,7 +206,11 @@ impl<O: Offset> MutableUtf8Array<O> {

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
}

/// Converts itself into an [`Array`].
Expand Down
9 changes: 6 additions & 3 deletions src/bitmap/utils/zip_validity.rs
Expand Up @@ -101,10 +101,13 @@ where
V: Iterator<Item = bool>,
{
/// Returns a new [`ZipValidity`]
pub fn new(values: I, validity: Option<V>) -> Self {
pub fn new(values: I, validity: Option<V>, null_count: Option<usize>) -> Self {
match validity {
Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)),
None => Self::Required(values),
// only if we have a validity and there are nulls we will iterate them
Some(validity) if null_count != Some(0) => {
Self::Optional(ZipValidityIter::new(values, validity))
}
_ => Self::Required(values),
}
}
}
Expand Down
12 changes: 10 additions & 2 deletions src/io/avro/write/serialize.rs
Expand Up @@ -126,7 +126,11 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray<O>, schema: &AvroSchema) ->
.offsets()
.windows(2)
.map(|w| (w[1] - w[0]).to_usize() as i64);
let lengths = ZipValidity::new(lengths, array.validity().as_ref().map(|x| x.iter()));
let lengths = ZipValidity::new(
lengths,
array.validity().as_ref().map(|x| x.iter()),
array.validity().as_ref().map(|x| x.unset_bits()),
);

Box::new(BufStreamingIterator::new(
lengths,
Expand Down Expand Up @@ -180,7 +184,11 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer
.map(|(x, schema)| new_serializer(x.as_ref(), schema))
.collect::<Vec<_>>();

let iterator = ZipValidity::new(0..array.len(), array.validity().as_ref().map(|x| x.iter()));
let iterator = ZipValidity::new(
0..array.len(),
array.validity().as_ref().map(|x| x.iter()),
array.validity().as_ref().map(|x| x.unset_bits()),
);

Box::new(BufStreamingIterator::new(
iterator,
Expand Down
7 changes: 6 additions & 1 deletion src/io/json/write/serialize.rs
Expand Up @@ -103,7 +103,11 @@ fn struct_serializer<'a>(
let names = array.fields().iter().map(|f| f.name.as_str());

Box::new(BufStreamingIterator::new(
ZipValidity::new(0..array.len(), array.validity().map(|x| x.iter())),
ZipValidity::new(
0..array.len(),
array.validity().map(|x| x.iter()),
array.validity().map(|x| x.unset_bits()),
),
move |maybe, buf| {
if maybe.is_some() {
let names = names.clone();
Expand Down Expand Up @@ -143,6 +147,7 @@ fn list_serializer<'a, O: Offset>(
ZipValidity::new(
array.offsets().windows(2),
array.validity().map(|x| x.iter()),
array.validity().map(|x| x.unset_bits()),
),
move |offset, buf| {
if let Some(offset) = offset {
Expand Down
16 changes: 9 additions & 7 deletions tests/it/bitmap/utils/zip_validity.rs
Expand Up @@ -8,7 +8,7 @@ fn basic() {
let a = Bitmap::from([true, false]);
let a = Some(a.iter());
let values = vec![0, 1];
let zip = ZipValidity::new(values.into_iter(), a);
let zip = ZipValidity::new(values.into_iter(), a, None);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some(0), None]);
Expand All @@ -19,7 +19,7 @@ fn complete() {
let a = Bitmap::from([true, false, true, false, true, false, true, false]);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a);
let zip = ZipValidity::new(values.into_iter(), a, None);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -31,6 +31,7 @@ fn complete() {
#[test]
fn slices() {
let a = Bitmap::from([true, false]);
let null_count = Some(a.unset_bits());
let a = Some(a.iter());
let offsets = vec![0, 2, 3];
let values = vec![1, 2, 3];
Expand All @@ -39,7 +40,7 @@ fn slices() {
let end = x[1];
&values[start..end]
});
let zip = ZipValidity::new(iter, a);
let zip = ZipValidity::new(iter, a, null_count);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some([1, 2].as_ref()), None]);
Expand All @@ -50,7 +51,7 @@ fn byte() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7, 8];
let zip = ZipValidity::new(values.into_iter(), a);
let zip = ZipValidity::new(values.into_iter(), a, None);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -74,7 +75,7 @@ fn offset() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a);
let zip = ZipValidity::new(values.into_iter(), a, None);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -86,7 +87,7 @@ fn offset() {
#[test]
fn none() {
let values = vec![0, 1, 2];
let zip = ZipValidity::new(values.into_iter(), None::<BitmapIter>);
let zip = ZipValidity::new(values.into_iter(), None::<BitmapIter>, None);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some(0), Some(1), Some(2)]);
Expand All @@ -95,9 +96,10 @@ fn none() {
#[test]
fn rev() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8);
let null_count = Some(a.unset_bits());
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a);
let zip = ZipValidity::new(values.into_iter(), a, null_count);

let result = zip.rev().collect::<Vec<_>>();
let expected = vec![None, Some(1), None, None, Some(4), Some(5), None, Some(7)]
Expand Down

0 comments on commit 03e981e

Please sign in to comment.