Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
add 'new_with_validity'
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 2, 2022
1 parent 03e981e commit e14cbe2
Show file tree
Hide file tree
Showing 17 changed files with 47 additions and 117 deletions.
7 changes: 1 addition & 6 deletions src/array/binary/mod.rs
Expand Up @@ -117,12 +117,7 @@ impl<O: Offset> BinaryArray<O> {

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
let null_count = self.validity.as_ref().map(|validity| validity.unset_bits());
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
null_count,
)
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
}

/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
Expand Down
11 changes: 3 additions & 8 deletions src/array/boolean/iterator.rs
Expand Up @@ -22,9 +22,9 @@ impl IntoIterator for BooleanArray {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let null_count = validity.as_ref().map(|validity| validity.unset_bits());
let validity = validity.map(|x| x.into_iter());
ZipValidity::new(values, validity, null_count)
let validity =
validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter()));
ZipValidity::new(values, validity)
}
}

Expand All @@ -42,14 +42,9 @@ impl<'a> MutableBooleanArray {
/// Returns an iterator over the optional values of this [`MutableBooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<bool, BitmapIter<'a>, BitmapIter<'a>> {
let null_count = self
.validity()
.as_ref()
.map(|validity| validity.unset_bits());
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
null_count,
)
}

Expand Down
8 changes: 1 addition & 7 deletions src/array/boolean/mod.rs
Expand Up @@ -88,13 +88,7 @@ impl BooleanArray {
/// Returns an iterator over the optional values of this [`BooleanArray`].
#[inline]
pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
ZipValidity::new(
self.values().iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(self.values().iter(), self.validity())
}

/// Returns an iterator over the values of this [`BooleanArray`].
Expand Down
9 changes: 1 addition & 8 deletions src/array/dictionary/mod.rs
Expand Up @@ -191,14 +191,7 @@ impl<K: DictionaryKey> DictionaryArray<K> {
/// This function will allocate a new [`Scalar`] per item and is usually not performant.
/// Consider calling `keys_iter` and `values`, downcasting `values`, and iterating over that.
pub fn iter(&self) -> ZipValidity<Box<dyn Scalar>, DictionaryValuesIter<K>, BitmapIter> {
ZipValidity::new(
DictionaryValuesIter::new(self),
self.keys.validity().as_ref().map(|x| x.iter()),
self.keys
.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(DictionaryValuesIter::new(self), self.keys.validity())
}

/// Returns an iterator of [`Box<dyn Scalar>`]
Expand Down
14 changes: 2 additions & 12 deletions src/array/fixed_size_binary/iterator.rs
Expand Up @@ -19,11 +19,7 @@ impl<'a> FixedSizeBinaryArray {
pub fn iter(
&'a self,
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(self.values_iter(), self.validity())
}

/// Returns iterator over the values of [`FixedSizeBinaryArray`]
Expand All @@ -46,13 +42,7 @@ impl<'a> MutableFixedSizeBinaryArray {
pub fn iter(
&'a self,
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(
self.iter_values(),
self.validity().as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new(self.iter_values(), self.validity().map(|x| x.iter()))
}

/// Returns iterator over the values of [`MutableFixedSizeBinaryArray`]
Expand Down
8 changes: 1 addition & 7 deletions src/array/fixed_size_list/iterator.rs
Expand Up @@ -36,13 +36,7 @@ impl<'a> IntoIterator for &'a FixedSizeListArray {
impl<'a> FixedSizeListArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
ZipValidity::new(
FixedSizeListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(FixedSizeListValuesIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
8 changes: 1 addition & 7 deletions src/array/list/iterator.rs
Expand Up @@ -35,13 +35,7 @@ impl<'a, O: Offset> IntoIterator for &'a ListArray<O> {
impl<'a, O: Offset> ListArray<O> {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a, O> {
ZipValidity::new(
ListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(ListValuesIter::new(self), self.validity.as_ref())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
6 changes: 1 addition & 5 deletions src/array/map/iterator.rs
Expand Up @@ -72,11 +72,7 @@ impl<'a> IntoIterator for &'a MapArray {
impl<'a> MapArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipValidity<Box<dyn Array>, MapValuesIter<'a>, BitmapIter<'a>> {
ZipValidity::new(
MapValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|x| x.unset_bits()),
)
ZipValidity::new_with_validity(MapValuesIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
9 changes: 3 additions & 6 deletions src/array/primitive/iterator.rs
Expand Up @@ -16,9 +16,9 @@ impl<T: NativeType> IntoIterator for PrimitiveArray<T> {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let null_count = validity.as_ref().map(|x| x.unset_bits());
let validity = validity.map(|x| x.into_iter());
ZipValidity::new(values, validity, null_count)
let validity =
validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter()));
ZipValidity::new(values, validity)
}
}

Expand All @@ -39,9 +39,6 @@ impl<'a, T: NativeType> MutablePrimitiveArray<T> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
}

Expand Down
6 changes: 1 addition & 5 deletions src/array/primitive/mod.rs
Expand Up @@ -141,11 +141,7 @@ impl<T: NativeType> PrimitiveArray<T> {
/// Returns an iterator over the values and validity, `Option<&T>`.
#[inline]
pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<T>, BitmapIter> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(self.values().iter(), self.validity())
}

/// Returns an iterator of the values, `&T`, ignoring the arrays' validity.
Expand Down
8 changes: 1 addition & 7 deletions src/array/struct_/iterator.rs
Expand Up @@ -89,13 +89,7 @@ impl<'a> IntoIterator for &'a StructArray {
impl<'a> StructArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
ZipValidity::new(
StructValueIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
self.validity()
.as_ref()
.map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(StructValueIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
6 changes: 1 addition & 5 deletions src/array/utf8/mod.rs
Expand Up @@ -133,11 +133,7 @@ impl<O: Offset> Utf8Array<O> {

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
ZipValidity::new_with_validity(self.values_iter(), self.validity())
}

/// Returns an iterator of `&str`
Expand Down
6 changes: 1 addition & 5 deletions src/array/utf8/mutable.rs
Expand Up @@ -206,11 +206,7 @@ impl<O: Offset> MutableUtf8Array<O> {

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(
self.values_iter(),
self.validity.as_ref().map(|x| x.iter()),
self.validity.as_ref().map(|validity| validity.unset_bits()),
)
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Converts itself into an [`Array`].
Expand Down
24 changes: 19 additions & 5 deletions src/bitmap/utils/zip_validity.rs
@@ -1,3 +1,5 @@
use crate::bitmap::utils::BitmapIter;
use crate::bitmap::Bitmap;
use crate::trusted_len::TrustedLen;

/// An [`Iterator`] over validity and values.
Expand Down Expand Up @@ -101,12 +103,24 @@ where
V: Iterator<Item = bool>,
{
/// Returns a new [`ZipValidity`]
pub fn new(values: I, validity: Option<V>, null_count: Option<usize>) -> Self {
pub fn new(values: I, validity: Option<V>) -> Self {
match validity {
// only if we have a validity and there are nulls we will iterate them
Some(validity) if null_count != Some(0) => {
Self::Optional(ZipValidityIter::new(values, validity))
}
Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)),
_ => Self::Required(values),
}
}
}

impl<'a, T, I> ZipValidity<T, I, BitmapIter<'a>>
where
I: Iterator<Item = T>,
{
/// Returns a new [`ZipValidity`] and drops the `validity` if all values
/// are valid.
pub fn new_with_validity(values: I, validity: Option<&'a Bitmap>) -> Self {
// only if the validity has nulls we take the optional branch.
match validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.iter())) {
Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)),
_ => Self::Required(values),
}
}
Expand Down
12 changes: 2 additions & 10 deletions src/io/avro/write/serialize.rs
Expand Up @@ -126,11 +126,7 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray<O>, schema: &AvroSchema) ->
.offsets()
.windows(2)
.map(|w| (w[1] - w[0]).to_usize() as i64);
let lengths = ZipValidity::new(
lengths,
array.validity().as_ref().map(|x| x.iter()),
array.validity().as_ref().map(|x| x.unset_bits()),
);
let lengths = ZipValidity::new_with_validity(lengths, array.validity());

Box::new(BufStreamingIterator::new(
lengths,
Expand Down Expand Up @@ -184,11 +180,7 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer
.map(|(x, schema)| new_serializer(x.as_ref(), schema))
.collect::<Vec<_>>();

let iterator = ZipValidity::new(
0..array.len(),
array.validity().as_ref().map(|x| x.iter()),
array.validity().as_ref().map(|x| x.unset_bits()),
);
let iterator = ZipValidity::new_with_validity(0..array.len(), array.validity());

Box::new(BufStreamingIterator::new(
iterator,
Expand Down
6 changes: 1 addition & 5 deletions src/io/json/write/serialize.rs
Expand Up @@ -103,11 +103,7 @@ fn struct_serializer<'a>(
let names = array.fields().iter().map(|f| f.name.as_str());

Box::new(BufStreamingIterator::new(
ZipValidity::new(
0..array.len(),
array.validity().map(|x| x.iter()),
array.validity().map(|x| x.unset_bits()),
),
ZipValidity::new_with_validity(0..array.len(), array.validity()),
move |maybe, buf| {
if maybe.is_some() {
let names = names.clone();
Expand Down
16 changes: 7 additions & 9 deletions tests/it/bitmap/utils/zip_validity.rs
Expand Up @@ -8,7 +8,7 @@ fn basic() {
let a = Bitmap::from([true, false]);
let a = Some(a.iter());
let values = vec![0, 1];
let zip = ZipValidity::new(values.into_iter(), a, None);
let zip = ZipValidity::new(values.into_iter(), a);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some(0), None]);
Expand All @@ -19,7 +19,7 @@ fn complete() {
let a = Bitmap::from([true, false, true, false, true, false, true, false]);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a, None);
let zip = ZipValidity::new(values.into_iter(), a);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -31,7 +31,6 @@ fn complete() {
#[test]
fn slices() {
let a = Bitmap::from([true, false]);
let null_count = Some(a.unset_bits());
let a = Some(a.iter());
let offsets = vec![0, 2, 3];
let values = vec![1, 2, 3];
Expand All @@ -40,7 +39,7 @@ fn slices() {
let end = x[1];
&values[start..end]
});
let zip = ZipValidity::new(iter, a, null_count);
let zip = ZipValidity::new(iter, a);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some([1, 2].as_ref()), None]);
Expand All @@ -51,7 +50,7 @@ fn byte() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7, 8];
let zip = ZipValidity::new(values.into_iter(), a, None);
let zip = ZipValidity::new(values.into_iter(), a);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -75,7 +74,7 @@ fn offset() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8);
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a, None);
let zip = ZipValidity::new(values.into_iter(), a);

let a = zip.collect::<Vec<_>>();
assert_eq!(
Expand All @@ -87,7 +86,7 @@ fn offset() {
#[test]
fn none() {
let values = vec![0, 1, 2];
let zip = ZipValidity::new(values.into_iter(), None::<BitmapIter>, None);
let zip = ZipValidity::new(values.into_iter(), None::<BitmapIter>);

let a = zip.collect::<Vec<_>>();
assert_eq!(a, vec![Some(0), Some(1), Some(2)]);
Expand All @@ -96,10 +95,9 @@ fn none() {
#[test]
fn rev() {
let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8);
let null_count = Some(a.unset_bits());
let a = Some(a.iter());
let values = vec![0, 1, 2, 3, 4, 5, 6, 7];
let zip = ZipValidity::new(values.into_iter(), a, null_count);
let zip = ZipValidity::new(values.into_iter(), a);

let result = zip.rev().collect::<Vec<_>>();
let expected = vec![None, Some(1), None, None, Some(4), Some(5), None, Some(7)]
Expand Down

0 comments on commit e14cbe2

Please sign in to comment.