Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split out arrow-array crate (#2594) #2769

Merged
merged 6 commits into from Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -18,6 +18,7 @@
[workspace]
members = [
"arrow",
"arrow-array",
"arrow-data",
"arrow-schema",
"arrow-buffer",
Expand Down
59 changes: 59 additions & 0 deletions arrow-array/Cargo.toml
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-array"
version = "23.0.0"
description = "Array abstractions for Apache Arrow"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_array"
path = "src/lib.rs"
bench = false


[target.'cfg(target_arch = "wasm32")'.dependencies]
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }

[dependencies]
arrow-buffer = { version = "23.0.0", path = "../arrow-buffer" }
arrow-schema = { version = "23.0.0", path = "../arrow-schema" }
arrow-data = { version = "23.0.0", path = "../arrow-data" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
num = { version = "0.4", default-features = false, features = ["std"] }
half = { version = "2.0", default-features = false }
hashbrown = { version = "0.12", default-features = false }

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }

[build-dependencies]
Expand Up @@ -15,18 +15,13 @@
// specific language governing permissions and limitations
// under the License.

use std::convert::From;
use std::fmt;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericBinaryIter,
GenericListArray, OffsetSizeTrait,
};
use crate::array::array::ArrayAccessor;
use crate::buffer::Buffer;
use crate::util::bit_util;
use crate::{buffer::MutableBuffer, datatypes::DataType};
use crate::iterator::GenericBinaryIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor, GenericListArray, OffsetSizeTrait};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;

/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
/// binary data.
Expand Down Expand Up @@ -239,13 +234,13 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
}

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let prefix = OffsetSize::PREFIX;

write!(f, "{}BinaryArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
std::fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
Expand Down Expand Up @@ -387,7 +382,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
/// Create a BinaryArray from a vector of byte slices.
///
/// ```
/// use arrow::array::{Array, BinaryArray};
/// use arrow_array::{Array, BinaryArray};
/// let values: Vec<&[u8]> =
/// vec![b"one", b"two", b"", b"three"];
/// let array = BinaryArray::from_vec(values);
Expand All @@ -401,7 +396,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
/// Create a BinaryArray from a vector of Optional (null) byte slices.
///
/// ```
/// use arrow::array::{Array, BinaryArray};
/// use arrow_array::{Array, BinaryArray};
/// let values: Vec<Option<&[u8]>> =
/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
/// let array = BinaryArray::from_opt_vec(values);
Expand All @@ -427,7 +422,7 @@ pub type BinaryArray = GenericBinaryArray<i32>;
/// Create a LargeBinaryArray from a vector of byte slices.
///
/// ```
/// use arrow::array::{Array, LargeBinaryArray};
/// use arrow_array::{Array, LargeBinaryArray};
/// let values: Vec<&[u8]> =
/// vec![b"one", b"two", b"", b"three"];
/// let array = LargeBinaryArray::from_vec(values);
Expand All @@ -441,7 +436,7 @@ pub type BinaryArray = GenericBinaryArray<i32>;
/// Create a LargeBinaryArray from a vector of Optional (null) byte slices.
///
/// ```
/// use arrow::array::{Array, LargeBinaryArray};
/// use arrow_array::{Array, LargeBinaryArray};
/// let values: Vec<Option<&[u8]>> =
/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
/// let array = LargeBinaryArray::from_opt_vec(values);
Expand All @@ -462,7 +457,8 @@ pub type LargeBinaryArray = GenericBinaryArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::{array::ListArray, datatypes::Field};
use crate::ListArray;
use arrow_schema::Field;

#[test]
fn test_binary_array() {
Expand Down
Expand Up @@ -15,23 +15,21 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::array::ArrayAccessor;
use std::borrow::Borrow;
use std::convert::From;
use std::iter::{FromIterator, IntoIterator};
use std::{any::Any, fmt};

use super::*;
use super::{array::print_long_array, raw_pointer::RawPtrBox};
use crate::buffer::{Buffer, MutableBuffer};
use crate::util::bit_util;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;

/// Array of bools
///
/// # Example
///
/// ```
/// use arrow::array::{Array, BooleanArray};
/// use arrow_array::{Array, BooleanArray};
/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
/// assert_eq!(4, arr.len());
/// assert_eq!(1, arr.null_count());
Expand All @@ -50,7 +48,7 @@ use crate::util::bit_util;
///
/// Using `from_iter`
/// ```
/// use arrow::array::{Array, BooleanArray};
/// use arrow_array::{Array, BooleanArray};
/// let v = vec![Some(false), Some(true), Some(false), Some(true)];
/// let arr = v.into_iter().collect::<BooleanArray>();
/// assert_eq!(4, arr.len());
Expand All @@ -72,11 +70,11 @@ pub struct BooleanArray {
raw_values: RawPtrBox<u8>,
}

impl fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl std::fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "BooleanArray\n[\n")?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
std::fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
Expand Down Expand Up @@ -238,7 +236,7 @@ impl<'a> BooleanArray {
}
}

impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
Expand Down Expand Up @@ -279,9 +277,6 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
mod tests {
use super::*;

use crate::buffer::Buffer;
use crate::datatypes::DataType;

#[test]
fn test_boolean_fmt_debug() {
let arr = BooleanArray::from(vec![true, false, false]);
Expand Down
Expand Up @@ -15,34 +15,31 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::ArrayAccessor;
use std::convert::From;
use std::fmt;
use std::marker::PhantomData;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
use crate::builder::BooleanBufferBuilder;
use crate::decimal::{Decimal, Decimal256};
use crate::iterator::DecimalIter;
use crate::raw_pointer::RawPtrBox;
use crate::types::{Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType};
use crate::{
print_long_array, Array, ArrayAccessor, FixedSizeBinaryArray, FixedSizeListArray,
};
use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray};
#[allow(deprecated)]
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{
validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type,
Decimal256Type, DecimalType, NativeDecimalType,
use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::decimal::{
validate_decimal256_precision_with_lt_bytes, validate_decimal_precision,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{Decimal, Decimal256};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::marker::PhantomData;

/// `Decimal128Array` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, DecimalArray, Decimal128Array};
/// use arrow::datatypes::DataType;
/// use arrow_array::{Array, DecimalArray, Decimal128Array};
/// use arrow_schema::DataType;
///
/// // Create a DecimalArray with the default precision and scale
/// let decimal_array: Decimal128Array = vec![
Expand Down Expand Up @@ -268,7 +265,11 @@ impl<T: DecimalType> DecimalArray<T> {
/// 1. `precision` is larger than [`Self::MAX_PRECISION`]
/// 2. `scale` is larger than [`Self::MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result<Self>
pub fn with_precision_and_scale(
self,
precision: u8,
scale: u8,
) -> Result<Self, ArrowError>
where
Self: Sized,
{
Expand All @@ -292,7 +293,11 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate that the new precision and scale are valid or not
fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> {
fn validate_precision_scale(
&self,
precision: u8,
scale: u8,
) -> Result<(), ArrowError> {
if precision > Self::MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
Expand Down Expand Up @@ -320,7 +325,7 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate all the data in the array are valid within the new precision or not
fn validate_data(&self, precision: u8) -> Result<()> {
fn validate_data(&self, precision: u8) -> Result<(), ArrowError> {
// TODO: Move into DecimalType
match Self::VALUE_LENGTH {
16 => self
Expand Down Expand Up @@ -361,7 +366,7 @@ impl Decimal128Array {

// Validates decimal128 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let decimal = unsafe { self.value_unchecked(idx) };
Expand All @@ -376,7 +381,7 @@ impl Decimal128Array {
impl Decimal256Array {
// Validates decimal256 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let raw_val = unsafe {
Expand Down Expand Up @@ -504,8 +509,8 @@ impl<T: DecimalType> From<DecimalArray<T>> for ArrayData {
}
}

impl<T: DecimalType> fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl<T: DecimalType> std::fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"Decimal{}Array<{}, {}>\n[\n",
Expand Down Expand Up @@ -552,13 +557,12 @@ impl<'a, T: DecimalType> DecimalArray<T> {

#[cfg(test)]
mod tests {
use crate::array::Decimal256Builder;
use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use crate::util::decimal::Decimal128;
use crate::{array::Decimal128Builder, datatypes::Field};
use num::{BigInt, Num};

use super::*;
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::decimal::Decimal128;
use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use arrow_schema::Field;
use num::{BigInt, Num};

#[test]
fn test_decimal_array() {
Expand Down