Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support for JSON ser/de records layout #1275

Merged
merged 21 commits into from Oct 30, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/array/binary/mutable.rs
@@ -1,7 +1,9 @@
use std::{iter::FromIterator, sync::Arc};

use crate::{
array::{specification::check_offsets, Array, MutableArray, Offset, TryExtend, TryPush},
array::{
specification::check_offsets, Array, Container, MutableArray, Offset, TryExtend, TryPush,
},
bitmap::MutableBitmap,
datatypes::DataType,
error::{Error, Result},
Expand Down Expand Up @@ -186,6 +188,12 @@ impl<O: Offset> MutableBinaryArray<O> {
}
}

impl<O: Offset> Container for MutableBinaryArray<O> {
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
fn with_capacity(capacity: usize) -> Self {
MutableBinaryArray::with_capacity(capacity)
}
}

impl<O: Offset> MutableArray for MutableBinaryArray<O> {
fn len(&self) -> usize {
self.offsets.len() - 1
Expand Down
8 changes: 7 additions & 1 deletion src/array/boolean/mutable.rs
Expand Up @@ -2,7 +2,7 @@ use std::iter::FromIterator;
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{Array, Container, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
datatypes::{DataType, PhysicalType},
error::Result,
Expand Down Expand Up @@ -453,6 +453,12 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for MutableBoolea
}
}

impl Container for MutableBooleanArray {
fn with_capacity(capacity: usize) -> Self {
MutableBooleanArray::with_capacity(capacity)
}
}

impl MutableArray for MutableBooleanArray {
fn len(&self) -> usize {
self.values.len()
Expand Down
8 changes: 7 additions & 1 deletion src/array/fixed_size_binary/mutable.rs
@@ -1,7 +1,7 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray},
array::{Array, Container, MutableArray},
bitmap::MutableBitmap,
datatypes::DataType,
error::{Error, Result},
Expand Down Expand Up @@ -211,6 +211,12 @@ impl MutableFixedSizeBinaryArray {
}
}

impl Container for MutableFixedSizeBinaryArray {
fn with_capacity(capacity: usize) -> Self {
MutableFixedSizeBinaryArray::with_capacity(capacity, 0)
}
}

impl MutableArray for MutableFixedSizeBinaryArray {
fn len(&self) -> usize {
self.values.len() / self.size
Expand Down
5 changes: 5 additions & 0 deletions src/array/fixed_size_list/mutable.rs
Expand Up @@ -59,6 +59,11 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}
}

/// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
pub const fn size(&self) -> usize {
self.size
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
Expand Down
87 changes: 85 additions & 2 deletions src/array/list/mutable.rs
@@ -1,10 +1,14 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
array::{
specification::try_check_offsets, Array, Container, MutableArray, Offset, TryExtend,
TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, Field},
error::{Error, Result},
trusted_len::TrustedLen,
};

use super::ListArray;
Expand Down Expand Up @@ -152,6 +156,75 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}
}

/// Expand this array, using elements from the underlying backing array.
/// Assumes the expansion begins at the highest previous offset, or zero if
/// this [MutableListArray] is currently empty.
///
/// Panics if:
/// - the new offsets are not in monotonic increasing order.
/// - any new offset is not in bounds of the backing array.
/// - the passed iterator has no upper bound.
pub fn extend_offsets<II>(&mut self, expansion: II)
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
where
II: IntoIterator<Item = Option<O>> + TrustedLen,
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
{
let current_len = self.offsets.len();
let (_, upper) = expansion.size_hint();
let upper = upper.expect("iterator must have upper bound");
if current_len == 0 && upper > 0 {
self.offsets.push(O::zero());
}
// safety: checked below
unsafe { self.unsafe_extend_offsets(expansion) };
if self.offsets.len() > current_len {
// check all inserted offsets
try_check_offsets(&self.offsets[current_len..], self.values.len())
.expect("invalid offsets");
}
// else expansion is empty, and this is trivially safe.
}

/// Expand this array, using elements from the underlying backing array.
/// Assumes the expansion begins at the highest previous offset, or zero if
/// this [MutableListArray] is currently empty.
///
/// # Safety
///
/// Assumes that `offsets` are in order, and do not overrun the underlying
/// `values` backing array.
///
/// Also assumes the expansion begins at the highest previous offset, or
/// zero if the array is currently empty.
///
/// Panics if the passed iterator has no upper bound.
pub unsafe fn unsafe_extend_offsets<II>(&mut self, expansion: II)
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
where
II: IntoIterator<Item = Option<O>> + TrustedLen,
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
{
let (_, upper) = expansion.size_hint();
let upper = upper.expect("iterator must have upper bound");
let final_size = self.len() + upper;
self.offsets.reserve(upper);

for item in expansion {
match item {
Some(offset) => {
self.offsets.push(offset);
if let Some(validity) = &mut self.validity {
validity.push(true);
}
}
None => self.push_null(),
}

if let Some(validity) = &mut self.validity {
if validity.capacity() < final_size {
validity.reserve(final_size - validity.capacity());
}
}
}
}

/// The values
pub fn mut_values(&mut self) -> &mut M {
&mut self.values
Expand Down Expand Up @@ -209,11 +282,21 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
validity.shrink_to_fit()
}
}

fn len(&self) -> usize {
self.offsets.len() - 1
}
}

impl<O: Offset, M: MutableArray + Default + 'static> Container for MutableListArray<O, M> {
fn with_capacity(capacity: usize) -> Self {
MutableListArray::with_capacity(capacity)
}
}

impl<O: Offset, M: MutableArray + 'static> MutableArray for MutableListArray<O, M> {
fn len(&self) -> usize {
self.offsets.len() - 1
MutableListArray::len(self)
}

fn validity(&self) -> Option<&MutableBitmap> {
Expand Down
53 changes: 53 additions & 0 deletions src/array/mod.rs
Expand Up @@ -17,6 +17,7 @@
//! Most arrays contain a [`MutableArray`] counterpart that is neither clonable nor slicable, but
//! can be operated in-place.
use std::any::Any;
use std::sync::Arc;

use crate::error::Result;
use crate::{
Expand Down Expand Up @@ -113,6 +114,15 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {

dyn_clone::clone_trait_object!(Array);

/// A trait describing an array with a backing store that can be preallocated to
/// a given size.
pub trait Container {
AnIrishDuck marked this conversation as resolved.
Show resolved Hide resolved
/// Create this array with a given capacity.
fn with_capacity(capacity: usize) -> Self
where
Self: Sized;
}

/// A trait describing a mutable array; i.e. an array whose values can be changed.
/// Mutable arrays cannot be cloned but can be mutated in place,
/// thereby making them useful to perform numeric operations without allocations.
Expand Down Expand Up @@ -170,6 +180,49 @@ pub trait MutableArray: std::fmt::Debug + Send + Sync {
fn shrink_to_fit(&mut self);
}

impl MutableArray for Box<dyn MutableArray> {
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
fn len(&self) -> usize {
self.as_ref().len()
}

fn validity(&self) -> Option<&MutableBitmap> {
self.as_ref().validity()
}

fn as_box(&mut self) -> Box<dyn Array> {
self.as_mut().as_box()
}

fn as_arc(&mut self) -> Arc<dyn Array> {
self.as_mut().as_arc()
}

fn data_type(&self) -> &DataType {
self.as_ref().data_type()
}

fn as_any(&self) -> &dyn std::any::Any {
self.as_ref().as_any()
}

fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self.as_mut().as_mut_any()
}

#[inline]
fn push_null(&mut self) {
self.as_mut().push_null()
}

fn shrink_to_fit(&mut self) {
self.as_mut().shrink_to_fit();
}

fn reserve(&mut self, additional: usize) {
self.as_mut().reserve(additional);
}
}

macro_rules! general_dyn {
($array:expr, $ty:ty, $f:expr) => {{
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
Expand Down
8 changes: 7 additions & 1 deletion src/array/primitive/mutable.rs
Expand Up @@ -2,7 +2,7 @@ use std::{iter::FromIterator, sync::Arc};

use crate::bitmap::Bitmap;
use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{Array, Container, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
datatypes::DataType,
error::{Error, Result},
Expand Down Expand Up @@ -378,6 +378,12 @@ impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {
}
}

impl<T: NativeType> Container for MutablePrimitiveArray<T> {
fn with_capacity(capacity: usize) -> Self {
MutablePrimitiveArray::with_capacity(capacity)
}
}

impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {
fn len(&self) -> usize {
self.values.len()
Expand Down
8 changes: 7 additions & 1 deletion src/array/utf8/mutable.rs
Expand Up @@ -2,7 +2,7 @@ use std::{iter::FromIterator, sync::Arc};

use crate::array::physical_binary::*;
use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
array::{Array, Container, MutableArray, Offset, TryExtend, TryPush},
bitmap::{
utils::{BitmapIter, ZipValidity},
Bitmap, MutableBitmap,
Expand Down Expand Up @@ -247,6 +247,12 @@ impl<O: Offset> MutableUtf8Array<O> {
}
}

impl<O: Offset> Container for MutableUtf8Array<O> {
fn with_capacity(capacity: usize) -> Self {
MutableUtf8Array::with_capacity(capacity)
}
}

impl<O: Offset> MutableArray for MutableUtf8Array<O> {
fn len(&self) -> usize {
self.len()
Expand Down