Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added TryExtendFromSelf
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Oct 21, 2022
1 parent 96ec770 commit 902078f
Show file tree
Hide file tree
Showing 22 changed files with 364 additions and 42 deletions.
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Expand Up @@ -114,7 +114,7 @@ impl<O: Offset> BinaryArray<O> {
/// Creates a new [`BinaryArray`] from a slice of optional `&[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
MutableBinaryArray::<O>::from(slice).into()
}

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
Expand Down
41 changes: 37 additions & 4 deletions src/array/binary/mutable.rs
@@ -1,21 +1,24 @@
use std::{iter::FromIterator, sync::Arc};

use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
bitmap::{Bitmap, MutableBitmap},
array::{Array, MutableArray, Offset, TryExtend, TryExtendFromSelf, TryPush},
bitmap::{
utils::{BitmapIter, ZipValidity},
Bitmap, MutableBitmap,
},
datatypes::DataType,
error::{Error, Result},
trusted_len::TrustedLen,
};

use super::{BinaryArray, MutableBinaryValuesArray};
use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
use crate::array::physical_binary::*;

/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.
/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableBinaryArray<O: Offset> {
values: MutableBinaryValuesArray<O>,
validity: Option<MutableBitmap>,
Expand Down Expand Up @@ -96,6 +99,12 @@ impl<O: Offset> MutableBinaryArray<O> {
Self { values, validity }
}

/// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
}

fn default_data_type() -> DataType {
BinaryArray::<O>::default_data_type()
}
Expand Down Expand Up @@ -193,6 +202,16 @@ impl<O: Offset> MutableBinaryArray<O> {
pub fn offsets(&self) -> &Vec<O> {
self.values.offsets()
}

/// Returns an iterator of `Option<&[u8]>`
pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Returns an iterator over the values of this array
pub fn values_iter(&self) -> MutableBinaryValuesIter<O> {
self.values.iter()
}
}

impl<O: Offset> MutableArray for MutableBinaryArray<O> {
Expand Down Expand Up @@ -478,3 +497,17 @@ impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
Ok(())
}
}

impl<O: Offset> PartialEq for MutableBinaryArray<O> {
fn eq(&self, other: &Self) -> bool {
self.iter().eq(other.iter())
}
}

impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)
}
}
10 changes: 9 additions & 1 deletion src/array/binary/mutable_values.rs
Expand Up @@ -3,7 +3,8 @@ use std::{iter::FromIterator, sync::Arc};
use crate::{
array::{
specification::{check_offsets_minimal, try_check_offsets},
Array, ArrayAccessor, ArrayValuesIter, MutableArray, Offset, TryExtend, TryPush,
Array, ArrayAccessor, ArrayValuesIter, MutableArray, Offset, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::DataType,
Expand Down Expand Up @@ -408,3 +409,10 @@ unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
self.len()
}
}

impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
self.values.extend_from_slice(&other.values);
try_extend_offsets(&mut self.offsets, &other.offsets)
}
}
17 changes: 15 additions & 2 deletions src/array/boolean/mutable.rs
Expand Up @@ -2,7 +2,10 @@ use std::iter::FromIterator;
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{
physical_binary::extend_validity, Array, MutableArray, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, PhysicalType},
error::Result,
Expand All @@ -15,7 +18,7 @@ use super::BooleanArray;
/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableBooleanArray {
data_type: DataType,
values: MutableBitmap,
Expand Down Expand Up @@ -533,3 +536,13 @@ impl PartialEq for MutableBooleanArray {
self.iter().eq(other.iter())
}
}

impl TryExtendFromSelf for MutableBooleanArray {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

let slice = other.values.as_slice();
self.values.extend_from_slice(slice, 0, other.values.len());
Ok(())
}
}
20 changes: 2 additions & 18 deletions src/array/fixed_size_binary/mod.rs
@@ -1,9 +1,4 @@
use crate::{
bitmap::{Bitmap, MutableBitmap},
buffer::Buffer,
datatypes::DataType,
error::Error,
};
use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType, error::Error};

use super::Array;

Expand Down Expand Up @@ -321,18 +316,7 @@ impl FixedSizeBinaryArray {
/// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
let values = slice
.as_ref()
.iter()
.copied()
.flat_map(|x| x.unwrap_or([0; N]))
.collect::<Vec<_>>();
let validity = slice
.as_ref()
.iter()
.map(|x| x.is_some())
.collect::<MutableBitmap>();
Self::new(DataType::FixedSizeBinary(N), values.into(), validity.into())
MutableFixedSizeBinaryArray::from(slice).into()
}
}

Expand Down
31 changes: 29 additions & 2 deletions src/array/fixed_size_binary/mutable.rs
@@ -1,7 +1,7 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray},
array::{physical_binary::extend_validity, Array, MutableArray, TryExtendFromSelf},
bitmap::MutableBitmap,
datatypes::DataType,
error::{Error, Result},
Expand All @@ -13,7 +13,7 @@ use super::{FixedSizeBinaryArray, FixedSizeBinaryValues};
/// Converting a [`MutableFixedSizeBinaryArray`] into a [`FixedSizeBinaryArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableFixedSizeBinaryArray {
data_type: DataType,
size: usize,
Expand Down Expand Up @@ -73,6 +73,23 @@ impl MutableFixedSizeBinaryArray {
)
}

/// Creates a new [`MutableFixedSizeBinaryArray`] from a slice of optional `[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
let values = slice
.as_ref()
.iter()
.copied()
.flat_map(|x| x.unwrap_or([0; N]))
.collect::<Vec<_>>();
let validity = slice
.as_ref()
.iter()
.map(|x| x.is_some())
.collect::<MutableBitmap>();
Self::from_data(DataType::FixedSizeBinary(N), values, validity.into())
}

/// tries to push a new entry to [`MutableFixedSizeBinaryArray`].
/// # Error
/// Errors iff the size of `value` is not equal to its own size.
Expand Down Expand Up @@ -278,3 +295,13 @@ impl PartialEq for MutableFixedSizeBinaryArray {
self.iter().eq(other.iter())
}
}

impl TryExtendFromSelf for MutableFixedSizeBinaryArray {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

let slice = other.values.as_slice();
self.values.extend_from_slice(slice);
Ok(())
}
}
23 changes: 21 additions & 2 deletions src/array/fixed_size_list/mutable.rs
@@ -1,7 +1,10 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{
physical_binary::extend_validity, Array, MutableArray, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, Field},
error::{Error, Result},
Expand All @@ -10,7 +13,7 @@ use crate::{
use super::FixedSizeListArray;

/// The mutable version of [`FixedSizeListArray`].
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableFixedSizeListArray<M: MutableArray> {
data_type: DataType,
size: usize,
Expand Down Expand Up @@ -59,6 +62,11 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}
}

/// The length of this array
pub fn len(&self) -> usize {
self.values.len() / self.size
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
Expand Down Expand Up @@ -205,3 +213,14 @@ where
Ok(())
}
}

impl<M> TryExtendFromSelf for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtendFromSelf,
{
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)
}
}
24 changes: 24 additions & 0 deletions src/array/list/iterator.rs
Expand Up @@ -46,3 +46,27 @@ impl<'a, O: Offset> ListArray<O> {
ListValuesIter::new(self)
}
}

struct Iter<T, I: Iterator<Item = Option<T>>> {
current: i32,
offsets: std::vec::IntoIter<i32>,
values: I,
}

impl<T, I: Iterator<Item = Option<T>> + Clone> Iterator for Iter<T, I> {
type Item = Option<std::iter::Take<std::iter::Skip<I>>>;

fn next(&mut self) -> Option<Self::Item> {
let next = self.offsets.next();
next.map(|next| {
let length = next - self.current;
let iter = self
.values
.clone()
.skip(self.current as usize)
.take(length as usize);
self.current = next;
Some(iter)
})
}
}
29 changes: 27 additions & 2 deletions src/array/list/mutable.rs
@@ -1,7 +1,10 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
array::{
physical_binary::{extend_validity, try_extend_offsets},
Array, MutableArray, Offset, TryExtend, TryExtendFromSelf, TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, Field},
error::{Error, Result},
Expand All @@ -10,7 +13,7 @@ use crate::{
use super::ListArray;

/// The mutable version of [`ListArray`].
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableListArray<O: Offset, M: MutableArray> {
data_type: DataType,
offsets: Vec<O>,
Expand Down Expand Up @@ -70,6 +73,8 @@ where
I: IntoIterator<Item = Option<T>>,
{
fn try_extend<II: IntoIterator<Item = Option<I>>>(&mut self, iter: II) -> Result<()> {
let iter = iter.into_iter();
self.reserve(iter.size_hint().0);
for items in iter {
self.try_push(items)?;
}
Expand All @@ -96,6 +101,20 @@ where
}
}

impl<O, M> TryExtendFromSelf for MutableListArray<O, M>
where
O: Offset,
M: MutableArray + TryExtendFromSelf,
{
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)?;

try_extend_offsets(&mut self.offsets, &other.offsets)
}
}

impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
pub fn new_from(values: M, data_type: DataType, capacity: usize) -> Self {
Expand Down Expand Up @@ -152,6 +171,12 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len() - 1
}

/// The values
pub fn mut_values(&mut self) -> &mut M {
&mut self.values
Expand Down
7 changes: 7 additions & 0 deletions src/array/mod.rs
Expand Up @@ -417,6 +417,13 @@ pub trait TryPush<A> {
fn try_push(&mut self, item: A) -> Result<()>;
}

/// A trait describing the ability of a struct to extend from a reference of itself.
/// Specialization of [`TryExtend`].
pub trait TryExtendFromSelf {
/// Tries to extend itself with elements from `other`, failing only on overflow.
fn try_extend_from_self(&mut self, other: &Self) -> Result<()>;
}

/// Trait that [`BinaryArray`] and [`Utf8Array`] implement for the purposes of DRY.
/// # Safety
/// The implementer must ensure that
Expand Down

0 comments on commit 902078f

Please sign in to comment.