From 731124f03ffd995d4e4534b874e9963762c43752 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 22 Sep 2022 14:09:25 +0100 Subject: [PATCH 1/6] Split out arrow-array --- Cargo.toml | 1 + arrow-array/Cargo.toml | 59 ++ .../src/array/binary_array.rs | 36 +- .../src/array/boolean_array.rs | 33 +- .../src/array/decimal_array.rs | 68 +- .../src/array/dictionary_array.rs | 206 +++-- .../src/array/fixed_size_binary_array.rs | 46 +- .../src/array/fixed_size_list_array.rs | 29 +- .../src/array/list_array.rs | 58 +- .../src/array/map_array.rs | 33 +- .../array.rs => arrow-array/src/array/mod.rs | 98 ++- .../src/array/null_array.rs | 17 +- .../src/array/primitive_array.rs | 303 ++++--- .../src/array/string_array.rs | 38 +- .../src/array/struct_array.rs | 41 +- .../src/array/union_array.rs | 44 +- .../src}/builder/boolean_buffer_builder.rs | 10 +- .../src}/builder/boolean_builder.rs | 67 +- .../src}/builder/buffer_builder.rs | 115 ++- .../src}/builder/decimal_builder.rs | 43 +- .../src}/builder/fixed_size_binary_builder.rs | 20 +- .../src}/builder/fixed_size_list_builder.rs | 20 +- .../src}/builder/generic_binary_builder.rs | 10 +- .../src}/builder/generic_list_builder.rs | 21 +- .../src}/builder/generic_string_builder.rs | 8 +- .../src}/builder/map_builder.rs | 25 +- .../array => arrow-array/src}/builder/mod.rs | 55 +- .../src}/builder/null_buffer_builder.rs | 5 +- .../src}/builder/primitive_builder.rs | 43 +- .../builder/primitive_dictionary_builder.rs | 27 +- .../src}/builder/string_dictionary_builder.rs | 35 +- .../src}/builder/struct_builder.rs | 24 +- .../src}/builder/union_builder.rs | 41 +- arrow-array/src/cast.rs | 767 ++++++++++++++++++ .../src/util => arrow-array/src}/decimal.rs | 28 +- .../datatypes => arrow-array/src}/delta.rs | 0 .../src/array => arrow-array/src}/iterator.rs | 42 +- arrow-array/src/lib.rs | 209 +++++ .../array => arrow-array/src}/raw_pointer.rs | 0 {arrow => arrow-array}/src/record_batch.rs | 110 +-- .../src/temporal_conversions.rs | 76 +- .../util => arrow-array/src}/trusted_len.rs | 6 +- .../datatypes => arrow-array/src}/types.rs | 7 +- arrow-schema/src/schema.rs | 3 + arrow/Cargo.toml | 1 + arrow/src/array/cast.rs | 761 ----------------- arrow/src/array/mod.rs | 474 +---------- arrow/src/compute/kernels/cast.rs | 5 +- arrow/src/compute/kernels/filter.rs | 8 +- arrow/src/compute/kernels/temporal.rs | 2 +- arrow/src/csv/reader.rs | 7 +- arrow/src/datatypes/mod.rs | 13 +- arrow/src/datatypes/native.rs | 23 +- arrow/src/json/reader.rs | 7 +- arrow/src/lib.rs | 30 +- arrow/src/util/mod.rs | 5 +- 56 files changed, 2104 insertions(+), 2159 deletions(-) create mode 100644 arrow-array/Cargo.toml rename arrow/src/array/array_binary.rs => arrow-array/src/array/binary_array.rs (97%) rename arrow/src/array/array_boolean.rs => arrow-array/src/array/boolean_array.rs (94%) rename arrow/src/array/array_decimal.rs => arrow-array/src/array/decimal_array.rs (95%) rename arrow/src/array/array_dictionary.rs => arrow-array/src/array/dictionary_array.rs (81%) rename arrow/src/array/array_fixed_size_binary.rs => arrow-array/src/array/fixed_size_binary_array.rs (95%) rename arrow/src/array/array_fixed_size_list.rs => arrow-array/src/array/fixed_size_list_array.rs (95%) rename arrow/src/array/array_list.rs => arrow-array/src/array/list_array.rs (96%) rename arrow/src/array/array_map.rs => arrow-array/src/array/map_array.rs (96%) rename arrow/src/array/array.rs => arrow-array/src/array/mod.rs (94%) rename arrow/src/array/null.rs => arrow-array/src/array/null_array.rs (93%) rename arrow/src/array/array_primitive.rs => arrow-array/src/array/primitive_array.rs (81%) rename arrow/src/array/array_string.rs => arrow-array/src/array/string_array.rs (97%) rename arrow/src/array/array_struct.rs => arrow-array/src/array/struct_array.rs (95%) rename arrow/src/array/array_union.rs => arrow-array/src/array/union_array.rs (97%) rename {arrow/src/array => arrow-array/src}/builder/boolean_buffer_builder.rs (98%) rename {arrow/src/array => arrow-array/src}/builder/boolean_builder.rs (85%) rename {arrow/src/array => arrow-array/src}/builder/buffer_builder.rs (75%) rename {arrow/src/array => arrow-array/src}/builder/decimal_builder.rs (94%) rename {arrow/src/array => arrow-array/src}/builder/fixed_size_binary_builder.rs (94%) rename {arrow/src/array => arrow-array/src}/builder/fixed_size_list_builder.rs (95%) rename {arrow/src/array => arrow-array/src}/builder/generic_binary_builder.rs (97%) rename {arrow/src/array => arrow-array/src}/builder/generic_list_builder.rs (96%) rename {arrow/src/array => arrow-array/src}/builder/generic_string_builder.rs (96%) rename {arrow/src/array => arrow-array/src}/builder/map_builder.rs (93%) rename {arrow/src/array => arrow-array/src}/builder/mod.rs (73%) rename {arrow/src/array => arrow-array/src}/builder/null_buffer_builder.rs (98%) rename {arrow/src/array => arrow-array/src}/builder/primitive_builder.rs (84%) rename {arrow/src/array => arrow-array/src}/builder/primitive_dictionary_builder.rs (92%) rename {arrow/src/array => arrow-array/src}/builder/string_dictionary_builder.rs (94%) rename {arrow/src/array => arrow-array/src}/builder/struct_builder.rs (97%) rename {arrow/src/array => arrow-array/src}/builder/union_builder.rs (92%) create mode 100644 arrow-array/src/cast.rs rename {arrow/src/util => arrow-array/src}/decimal.rs (96%) rename {arrow/src/datatypes => arrow-array/src}/delta.rs (100%) rename {arrow/src/array => arrow-array/src}/iterator.rs (88%) create mode 100644 arrow-array/src/lib.rs rename {arrow/src/array => arrow-array/src}/raw_pointer.rs (100%) rename {arrow => arrow-array}/src/record_batch.rs (91%) rename {arrow => arrow-array}/src/temporal_conversions.rs (71%) rename {arrow/src/util => arrow-array/src}/trusted_len.rs (96%) rename {arrow/src/datatypes => arrow-array/src}/types.rs (99%) delete mode 100644 arrow/src/array/cast.rs diff --git a/Cargo.toml b/Cargo.toml index 270d23f26c9..28517265b3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ [workspace] members = [ "arrow", + "arrow-array", "arrow-data", "arrow-schema", "arrow-buffer", diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml new file mode 100644 index 00000000000..8e66bf3b763 --- /dev/null +++ b/arrow-array/Cargo.toml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-array" +version = "23.0.0" +description = "Array abstractions for Apache Arrow" +homepage = "https://github.com/apache/arrow-rs" +repository = "https://github.com/apache/arrow-rs" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow"] +include = [ + "benches/*.rs", + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "arrow_array" +path = "src/lib.rs" +bench = false + + +[target.'cfg(target_arch = "wasm32")'.dependencies] +ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } + +[dependencies] +arrow-buffer = { version = "23.0.0", path = "../arrow-buffer" } +arrow-schema = { version = "23.0.0", path = "../arrow-schema" } +arrow-data = { version = "23.0.0", path = "../arrow-data" } +chrono = { version = "0.4", default-features = false, features = ["clock"] } +num = { version = "0.4", default-features = false, features = ["std"] } +half = { version = "2.0", default-features = false } +hashbrown = { version = "0.12", default-features = false } + +[dev-dependencies] +rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } + +[build-dependencies] diff --git a/arrow/src/array/array_binary.rs b/arrow-array/src/array/binary_array.rs similarity index 97% rename from arrow/src/array/array_binary.rs rename to arrow-array/src/array/binary_array.rs index 1c63e8e24b2..cb168daf072 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow-array/src/array/binary_array.rs @@ -15,18 +15,13 @@ // specific language governing permissions and limitations // under the License. -use std::convert::From; -use std::fmt; -use std::{any::Any, iter::FromIterator}; - -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericBinaryIter, - GenericListArray, OffsetSizeTrait, -}; -use crate::array::array::ArrayAccessor; -use crate::buffer::Buffer; -use crate::util::bit_util; -use crate::{buffer::MutableBuffer, datatypes::DataType}; +use crate::iterator::GenericBinaryIter; +use crate::raw_pointer::RawPtrBox; +use crate::{print_long_array, Array, ArrayAccessor, GenericListArray, OffsetSizeTrait}; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::DataType; +use std::any::Any; /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing /// binary data. @@ -239,13 +234,13 @@ impl GenericBinaryArray { } } -impl fmt::Debug for GenericBinaryArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for GenericBinaryArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let prefix = OffsetSize::PREFIX; write!(f, "{}BinaryArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -387,7 +382,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray { /// Create a BinaryArray from a vector of byte slices. /// /// ``` -/// use arrow::array::{Array, BinaryArray}; +/// use arrow_array::{Array, BinaryArray}; /// let values: Vec<&[u8]> = /// vec![b"one", b"two", b"", b"three"]; /// let array = BinaryArray::from_vec(values); @@ -401,7 +396,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray { /// Create a BinaryArray from a vector of Optional (null) byte slices. /// /// ``` -/// use arrow::array::{Array, BinaryArray}; +/// use arrow_array::{Array, BinaryArray}; /// let values: Vec> = /// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; /// let array = BinaryArray::from_opt_vec(values); @@ -427,7 +422,7 @@ pub type BinaryArray = GenericBinaryArray; /// Create a LargeBinaryArray from a vector of byte slices. /// /// ``` -/// use arrow::array::{Array, LargeBinaryArray}; +/// use arrow_array::{Array, LargeBinaryArray}; /// let values: Vec<&[u8]> = /// vec![b"one", b"two", b"", b"three"]; /// let array = LargeBinaryArray::from_vec(values); @@ -441,7 +436,7 @@ pub type BinaryArray = GenericBinaryArray; /// Create a LargeBinaryArray from a vector of Optional (null) byte slices. /// /// ``` -/// use arrow::array::{Array, LargeBinaryArray}; +/// use arrow_array::{Array, LargeBinaryArray}; /// let values: Vec> = /// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; /// let array = LargeBinaryArray::from_opt_vec(values); @@ -462,7 +457,8 @@ pub type LargeBinaryArray = GenericBinaryArray; #[cfg(test)] mod tests { use super::*; - use crate::{array::ListArray, datatypes::Field}; + use crate::ListArray; + use arrow_schema::Field; #[test] fn test_binary_array() { diff --git a/arrow/src/array/array_boolean.rs b/arrow-array/src/array/boolean_array.rs similarity index 94% rename from arrow/src/array/array_boolean.rs rename to arrow-array/src/array/boolean_array.rs index 7ea18ea6203..24be122c933 100644 --- a/arrow/src/array/array_boolean.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -15,23 +15,21 @@ // specific language governing permissions and limitations // under the License. -use crate::array::array::ArrayAccessor; -use std::borrow::Borrow; -use std::convert::From; -use std::iter::{FromIterator, IntoIterator}; -use std::{any::Any, fmt}; - -use super::*; -use super::{array::print_long_array, raw_pointer::RawPtrBox}; -use crate::buffer::{Buffer, MutableBuffer}; -use crate::util::bit_util; +use crate::builder::BooleanBuilder; +use crate::iterator::BooleanIter; +use crate::raw_pointer::RawPtrBox; +use crate::{print_long_array, Array, ArrayAccessor}; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::DataType; +use std::any::Any; /// Array of bools /// /// # Example /// /// ``` -/// use arrow::array::{Array, BooleanArray}; +/// use arrow_array::{Array, BooleanArray}; /// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); /// assert_eq!(4, arr.len()); /// assert_eq!(1, arr.null_count()); @@ -50,7 +48,7 @@ use crate::util::bit_util; /// /// Using `from_iter` /// ``` -/// use arrow::array::{Array, BooleanArray}; +/// use arrow_array::{Array, BooleanArray}; /// let v = vec![Some(false), Some(true), Some(false), Some(true)]; /// let arr = v.into_iter().collect::(); /// assert_eq!(4, arr.len()); @@ -72,11 +70,11 @@ pub struct BooleanArray { raw_values: RawPtrBox, } -impl fmt::Debug for BooleanArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for BooleanArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "BooleanArray\n[\n")?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -238,7 +236,7 @@ impl<'a> BooleanArray { } } -impl>> FromIterator for BooleanArray { +impl>> FromIterator for BooleanArray { fn from_iter>(iter: I) -> Self { let iter = iter.into_iter(); let (_, data_len) = iter.size_hint(); @@ -279,9 +277,6 @@ impl>> FromIterator for BooleanArray { mod tests { use super::*; - use crate::buffer::Buffer; - use crate::datatypes::DataType; - #[test] fn test_boolean_fmt_debug() { let arr = BooleanArray::from(vec![true, false, false]); diff --git a/arrow/src/array/array_decimal.rs b/arrow-array/src/array/decimal_array.rs similarity index 95% rename from arrow/src/array/array_decimal.rs rename to arrow-array/src/array/decimal_array.rs index f6a2dda2da5..34b424092e4 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow-array/src/array/decimal_array.rs @@ -15,25 +15,22 @@ // specific language governing permissions and limitations // under the License. -use crate::array::ArrayAccessor; -use std::convert::From; -use std::fmt; -use std::marker::PhantomData; -use std::{any::Any, iter::FromIterator}; - -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray, +use crate::builder::BooleanBufferBuilder; +use crate::decimal::{Decimal, Decimal256}; +use crate::iterator::DecimalIter; +use crate::raw_pointer::RawPtrBox; +use crate::types::{Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType}; +use crate::{ + print_long_array, Array, ArrayAccessor, FixedSizeBinaryArray, FixedSizeListArray, }; -use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray}; -#[allow(deprecated)] -use crate::buffer::{Buffer, MutableBuffer}; -use crate::datatypes::validate_decimal_precision; -use crate::datatypes::{ - validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type, - Decimal256Type, DecimalType, NativeDecimalType, +use arrow_buffer::{Buffer, MutableBuffer}; +use arrow_data::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, }; -use crate::error::{ArrowError, Result}; -use crate::util::decimal::{Decimal, Decimal256}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; +use std::any::Any; +use std::marker::PhantomData; /// `Decimal128Array` stores fixed width decimal numbers, /// with a fixed precision and scale. @@ -41,8 +38,8 @@ use crate::util::decimal::{Decimal, Decimal256}; /// # Examples /// /// ``` -/// use arrow::array::{Array, DecimalArray, Decimal128Array}; -/// use arrow::datatypes::DataType; +/// use arrow_array::{Array, DecimalArray, Decimal128Array}; +/// use arrow_schema::DataType; /// /// // Create a DecimalArray with the default precision and scale /// let decimal_array: Decimal128Array = vec![ @@ -268,7 +265,11 @@ impl DecimalArray { /// 1. `precision` is larger than [`Self::MAX_PRECISION`] /// 2. `scale` is larger than [`Self::MAX_SCALE`]; /// 3. `scale` is > `precision` - pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result + pub fn with_precision_and_scale( + self, + precision: u8, + scale: u8, + ) -> Result where Self: Sized, { @@ -292,7 +293,11 @@ impl DecimalArray { } // validate that the new precision and scale are valid or not - fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> { + fn validate_precision_scale( + &self, + precision: u8, + scale: u8, + ) -> Result<(), ArrowError> { if precision > Self::MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "precision {} is greater than max {}", @@ -320,7 +325,7 @@ impl DecimalArray { } // validate all the data in the array are valid within the new precision or not - fn validate_data(&self, precision: u8) -> Result<()> { + fn validate_data(&self, precision: u8) -> Result<(), ArrowError> { // TODO: Move into DecimalType match Self::VALUE_LENGTH { 16 => self @@ -361,7 +366,7 @@ impl Decimal128Array { // Validates decimal128 values in this array can be properly interpreted // with the specified precision. - fn validate_decimal_precision(&self, precision: u8) -> Result<()> { + fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { (0..self.len()).try_for_each(|idx| { if self.is_valid(idx) { let decimal = unsafe { self.value_unchecked(idx) }; @@ -376,7 +381,7 @@ impl Decimal128Array { impl Decimal256Array { // Validates decimal256 values in this array can be properly interpreted // with the specified precision. - fn validate_decimal_precision(&self, precision: u8) -> Result<()> { + fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { (0..self.len()).try_for_each(|idx| { if self.is_valid(idx) { let raw_val = unsafe { @@ -504,8 +509,8 @@ impl From> for ArrayData { } } -impl fmt::Debug for DecimalArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for DecimalArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, "Decimal{}Array<{}, {}>\n[\n", @@ -552,13 +557,12 @@ impl<'a, T: DecimalType> DecimalArray { #[cfg(test)] mod tests { - use crate::array::Decimal256Builder; - use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; - use crate::util::decimal::Decimal128; - use crate::{array::Decimal128Builder, datatypes::Field}; - use num::{BigInt, Num}; - use super::*; + use crate::builder::{Decimal128Builder, Decimal256Builder}; + use crate::decimal::Decimal128; + use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; + use arrow_schema::Field; + use num::{BigInt, Num}; #[test] fn test_decimal_array() { diff --git a/arrow/src/array/array_dictionary.rs b/arrow-array/src/array/dictionary_array.rs similarity index 81% rename from arrow/src/array/array_dictionary.rs rename to arrow-array/src/array/dictionary_array.rs index acdb427a22a..35d243fde9a 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -15,20 +15,130 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{ArrayAccessor, ArrayIter}; +use crate::builder::StringDictionaryBuilder; +use crate::iterator::ArrayIter; +use crate::types::*; +use crate::{ + make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, PrimitiveArray, + StringArray, +}; +use arrow_buffer::ArrowNativeType; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; -use std::fmt; -use std::iter::IntoIterator; -use std::{convert::From, iter::FromIterator}; -use super::{ - make_array, Array, ArrayData, ArrayRef, PrimitiveArray, StringArray, - StringDictionaryBuilder, -}; -use crate::datatypes::{ - ArrowDictionaryKeyType, ArrowNativeType, ArrowPrimitiveType, DataType, -}; -use crate::error::Result; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int8DictionaryArray, Int8Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type Int8DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int16DictionaryArray, Int16Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int16Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type Int16DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int32DictionaryArray, Int32Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int32Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type Int32DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int64DictionaryArray, Int64Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int64Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type Int64DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt8DictionaryArray, UInt8Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt8Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type UInt8DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt16DictionaryArray, UInt16Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt16Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type UInt16DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt32DictionaryArray, UInt32Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt32Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type UInt32DictionaryArray = DictionaryArray; +/// +/// A dictionary array where each element is a single value indexed by an integer key. +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt64DictionaryArray, UInt64Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt64Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +pub type UInt64DictionaryArray = DictionaryArray; /// A dictionary array where each element is a single value indexed by an integer key. /// This is mostly used to represent strings or a limited set of primitive types as integers, @@ -65,8 +175,7 @@ use crate::error::Result; /// Example **with nullable** data: /// /// ``` -/// use arrow::array::{DictionaryArray, Int8Array}; -/// use arrow::datatypes::Int8Type; +/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)])); @@ -75,8 +184,7 @@ use crate::error::Result; /// Example **without nullable** data: /// /// ``` -/// use arrow::array::{DictionaryArray, Int8Array}; -/// use arrow::datatypes::Int8Type; +/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.into_iter().collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); @@ -85,8 +193,7 @@ use crate::error::Result; /// Example from existing arrays: /// /// ``` -/// use arrow::array::{DictionaryArray, Int8Array, StringArray}; -/// use arrow::datatypes::Int8Type; +/// use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type}; /// // You can form your own DictionaryArray by providing the /// // values (dictionary) and keys (indexes into the dictionary): /// let values = StringArray::from_iter_values(["a", "b", "c"]); @@ -120,7 +227,10 @@ impl DictionaryArray { /// (indexes into the dictionary) and values (dictionary) /// array. Returns an error if there are any keys that are outside /// of the dictionary array. - pub fn try_new(keys: &PrimitiveArray, values: &dyn Array) -> Result { + pub fn try_new( + keys: &PrimitiveArray, + values: &dyn Array, + ) -> Result { let dict_data_type = DataType::Dictionary( Box::new(keys.data_type().clone()), Box::new(values.data_type().clone()), @@ -152,28 +262,6 @@ impl DictionaryArray { Ok(array.into()) } - /// Create a new DictionaryArray directly from specified keys - /// (indexes into the dictionary) and values (dictionary) - /// array, and the corresponding ArrayData. This is used internally - /// for the usage like filter kernel. - /// - /// # Safety - /// - /// The input keys, values and data must form a valid DictionaryArray, - /// or undefined behavior can occur. - pub(crate) unsafe fn try_new_unchecked( - keys: PrimitiveArray, - values: ArrayRef, - data: ArrayData, - ) -> Self { - Self { - data, - keys, - values, - is_ordered: false, - } - } - /// Return an array view of the keys of this dictionary as a PrimitiveArray. pub fn keys(&self) -> &PrimitiveArray { &self.keys @@ -239,8 +327,7 @@ impl DictionaryArray { /// Downcast this dictionary to a [`TypedDictionaryArray`] /// /// ``` - /// use arrow::array::{Array, ArrayAccessor, DictionaryArray, StringArray}; - /// use arrow::datatypes::Int32Type; + /// use arrow_array::{Array, ArrayAccessor, DictionaryArray, StringArray, types::Int32Type}; /// /// let orig = [Some("a"), Some("b"), None]; /// let dictionary = DictionaryArray::::from_iter(orig); @@ -312,8 +399,7 @@ impl From> for ArrayData { /// /// # Example: /// ``` -/// use arrow::array::{DictionaryArray, PrimitiveArray, StringArray}; -/// use arrow::datatypes::Int8Type; +/// use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type}; /// /// let test = vec!["a", "a", "b", "c"]; /// let array: DictionaryArray = test @@ -351,8 +437,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator> for Dictionary /// # Example: /// /// ``` -/// use arrow::array::{DictionaryArray, PrimitiveArray, StringArray}; -/// use arrow::datatypes::Int8Type; +/// use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type}; /// /// let test = vec!["a", "a", "b", "c"]; /// let array: DictionaryArray = test.into_iter().collect(); @@ -390,8 +475,8 @@ impl Array for DictionaryArray { } } -impl fmt::Debug for DictionaryArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for DictionaryArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { writeln!( f, "DictionaryArray {{keys: {:?} values: {:?}}}", @@ -404,8 +489,7 @@ impl fmt::Debug for DictionaryArray { /// allowing fast access to its elements /// /// ``` -/// use arrow::array::{ArrayIter, DictionaryArray, StringArray}; -/// use arrow::datatypes::Int32Type; +/// use arrow_array::{DictionaryArray, StringArray, types::Int32Type}; /// /// let orig = ["a", "b", "a", "b"]; /// let dictionary = DictionaryArray::::from_iter(orig); @@ -436,8 +520,8 @@ impl<'a, K: ArrowPrimitiveType, V> Clone for TypedDictionaryArray<'a, K, V> { impl<'a, K: ArrowPrimitiveType, V> Copy for TypedDictionaryArray<'a, K, V> {} -impl<'a, K: ArrowPrimitiveType, V> fmt::Debug for TypedDictionaryArray<'a, K, V> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl<'a, K: ArrowPrimitiveType, V> std::fmt::Debug for TypedDictionaryArray<'a, K, V> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { writeln!(f, "TypedDictionaryArray({:?})", self.dictionary) } } @@ -516,18 +600,12 @@ where #[cfg(test)] mod tests { use super::*; - - use crate::array::{Float32Array, Int8Array}; - use crate::datatypes::{Float32Type, Int16Type}; - use crate::{ - array::Int16DictionaryArray, array::PrimitiveDictionaryBuilder, - datatypes::DataType, - }; - use crate::{ - array::{Int16Array, Int32Array}, - datatypes::{Int32Type, Int8Type, UInt32Type, UInt8Type}, + use crate::builder::PrimitiveDictionaryBuilder; + use crate::types::{ + Float32Type, Int16Type, Int32Type, Int8Type, UInt32Type, UInt8Type, }; - use crate::{buffer::Buffer, datatypes::ToByteSlice}; + use crate::{Float32Array, Int16Array, Int32Array, Int8Array}; + use arrow_buffer::{Buffer, ToByteSlice}; #[test] fn test_dictionary_array() { diff --git a/arrow/src/array/array_fixed_size_binary.rs b/arrow-array/src/array/fixed_size_binary_array.rs similarity index 95% rename from arrow/src/array/array_fixed_size_binary.rs rename to arrow-array/src/array/fixed_size_binary_array.rs index 22eac1435a8..f37d1e3e5c3 100644 --- a/arrow/src/array/array_fixed_size_binary.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -15,18 +15,13 @@ // specific language governing permissions and limitations // under the License. +use crate::iterator::FixedSizeBinaryIter; +use crate::raw_pointer::RawPtrBox; +use crate::{print_long_array, Array, ArrayAccessor, FixedSizeListArray}; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; -use std::convert::From; -use std::fmt; - -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray, -}; -use crate::array::{ArrayAccessor, FixedSizeBinaryIter}; -use crate::buffer::Buffer; -use crate::error::{ArrowError, Result}; -use crate::util::bit_util; -use crate::{buffer::MutableBuffer, datatypes::DataType}; /// An array where each element is a fixed-size sequence of bytes. /// @@ -35,7 +30,7 @@ use crate::{buffer::MutableBuffer, datatypes::DataType}; /// Create an array from an iterable argument of byte slices. /// /// ``` -/// use arrow::array::{Array, FixedSizeBinaryArray}; +/// use arrow_array::{Array, FixedSizeBinaryArray}; /// let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ]; /// let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); /// @@ -45,7 +40,7 @@ use crate::{buffer::MutableBuffer, datatypes::DataType}; /// Create an array from an iterable argument of sparse byte slices. /// Sparsity means that the input argument can contain `None` items. /// ``` -/// use arrow::array::{Array, FixedSizeBinaryArray}; +/// use arrow_array::{Array, FixedSizeBinaryArray}; /// let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ]; /// let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap(); /// assert_eq!(5, arr.len()) @@ -119,7 +114,7 @@ impl FixedSizeBinaryArray { /// # Examples /// /// ``` - /// use arrow::array::FixedSizeBinaryArray; + /// use arrow_array::FixedSizeBinaryArray; /// let input_arg = vec![ /// None, /// Some(vec![7, 8]), @@ -134,7 +129,7 @@ impl FixedSizeBinaryArray { /// # Errors /// /// Returns error if argument has length zero, or sizes of nested slices don't match. - pub fn try_from_sparse_iter(mut iter: T) -> Result + pub fn try_from_sparse_iter(mut iter: T) -> Result where T: Iterator>, U: AsRef<[u8]>, @@ -145,7 +140,7 @@ impl FixedSizeBinaryArray { let mut null_buf = MutableBuffer::from_len_zeroed(0); let mut buffer = MutableBuffer::from_len_zeroed(0); let mut prepend = 0; - iter.try_for_each(|item| -> Result<()> { + iter.try_for_each(|item| -> Result<(), ArrowError> { // extend null bitmask by one byte per each 8 items if byte == 0 { null_buf.push(0u8); @@ -206,7 +201,7 @@ impl FixedSizeBinaryArray { /// # Examples /// /// ``` - /// use arrow::array::FixedSizeBinaryArray; + /// use arrow_array::FixedSizeBinaryArray; /// let input_arg = vec![ /// vec![1, 2], /// vec![3, 4], @@ -218,7 +213,7 @@ impl FixedSizeBinaryArray { /// # Errors /// /// Returns error if argument has length zero, or sizes of nested slices don't match. - pub fn try_from_iter(mut iter: T) -> Result + pub fn try_from_iter(mut iter: T) -> Result where T: Iterator, U: AsRef<[u8]>, @@ -226,7 +221,7 @@ impl FixedSizeBinaryArray { let mut len = 0; let mut size = None; let mut buffer = MutableBuffer::from_len_zeroed(0); - iter.try_for_each(|item| -> Result<()> { + iter.try_for_each(|item| -> Result<(), ArrowError> { let slice = item.as_ref(); if let Some(size) = size { if size != slice.len() { @@ -348,11 +343,11 @@ impl From> for FixedSizeBinaryArray { } } -impl fmt::Debug for FixedSizeBinaryArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for FixedSizeBinaryArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -395,13 +390,10 @@ impl<'a> IntoIterator for &'a FixedSizeBinaryArray { #[cfg(test)] mod tests { + use crate::RecordBatch; + use arrow_schema::{Field, Schema}; use std::sync::Arc; - use crate::{ - datatypes::{Field, Schema}, - record_batch::RecordBatch, - }; - use super::*; #[test] diff --git a/arrow/src/array/array_fixed_size_list.rs b/arrow-array/src/array/fixed_size_list_array.rs similarity index 95% rename from arrow/src/array/array_fixed_size_list.rs rename to arrow-array/src/array/fixed_size_list_array.rs index fc568d54a83..a10c1d28dab 100644 --- a/arrow/src/array/array_fixed_size_list.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -15,12 +15,10 @@ // specific language governing permissions and limitations // under the License. +use crate::{make_array, print_long_array, Array, ArrayAccessor, ArrayRef}; +use arrow_data::ArrayData; +use arrow_schema::DataType; use std::any::Any; -use std::fmt; - -use super::{array::print_long_array, make_array, Array, ArrayData, ArrayRef}; -use crate::array::array::ArrayAccessor; -use crate::datatypes::DataType; /// A list array where each element is a fixed-size sequence of values with the same /// type whose maximum length is represented by a i32. @@ -28,9 +26,10 @@ use crate::datatypes::DataType; /// # Example /// /// ``` -/// # use arrow::array::{Array, ArrayData, FixedSizeListArray, Int32Array}; -/// # use arrow::datatypes::{DataType, Field}; -/// # use arrow::buffer::Buffer; +/// # use arrow_array::{Array, FixedSizeListArray, Int32Array}; +/// # use arrow_data::ArrayData; +/// # use arrow_schema::{DataType, Field}; +/// # use arrow_buffer::Buffer; /// // Construct a value array /// let value_data = ArrayData::builder(DataType::Int32) /// .len(9) @@ -174,11 +173,11 @@ impl ArrayAccessor for FixedSizeListArray { } } -impl fmt::Debug for FixedSizeListArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for FixedSizeListArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "FixedSizeListArray<{}>\n[\n", self.value_length())?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -186,12 +185,10 @@ impl fmt::Debug for FixedSizeListArray { #[cfg(test)] mod tests { - use crate::{ - array::ArrayData, array::Int32Array, buffer::Buffer, datatypes::Field, - util::bit_util, - }; - use super::*; + use crate::Int32Array; + use arrow_buffer::{bit_util, Buffer}; + use arrow_schema::Field; #[test] fn test_fixed_size_list_array() { diff --git a/arrow/src/array/array_list.rs b/arrow-array/src/array/list_array.rs similarity index 96% rename from arrow/src/array/array_list.rs rename to arrow-array/src/array/list_array.rs index e830acdc2b9..83b0c6d5bd4 100644 --- a/arrow/src/array/array_list.rs +++ b/arrow-array/src/array/list_array.rs @@ -15,21 +15,17 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::fmt; - -use num::Integer; - -use super::{ - array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayData, - ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray, -}; -use crate::array::array::ArrayAccessor; +use crate::array::make_array; use crate::{ - buffer::MutableBuffer, - datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType, Field}, - error::ArrowError, + builder::BooleanBufferBuilder, iterator::GenericListArrayIter, print_long_array, + raw_pointer::RawPtrBox, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, + PrimitiveArray, }; +use arrow_buffer::{ArrowNativeType, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field}; +use num::Integer; +use std::any::Any; /// trait declaring an offset size, relevant for i32 vs i64 array types. pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer { @@ -137,8 +133,9 @@ impl GenericListArray { /// Creates a [`GenericListArray`] from an iterator of primitive values /// # Example /// ``` - /// # use arrow::array::ListArray; - /// # use arrow::datatypes::Int32Type; + /// # use arrow_array::ListArray; + /// # use arrow_array::types::Int32Type; + /// /// let data = vec![ /// Some(vec![Some(0), Some(1), Some(2)]), /// None, @@ -281,13 +278,13 @@ impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray fmt::Debug for GenericListArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for GenericListArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let prefix = OffsetSize::PREFIX; write!(f, "{}ListArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -299,8 +296,8 @@ impl fmt::Debug for GenericListArray { /// # Example /// /// ``` -/// # use arrow::array::{Array, ListArray, Int32Array}; -/// # use arrow::datatypes::{DataType, Int32Type}; +/// # use arrow_array::{Array, ListArray, Int32Array, types::Int32Type}; +/// # use arrow_schema::DataType; /// let data = vec![ /// Some(vec![]), /// None, @@ -326,8 +323,8 @@ pub type ListArray = GenericListArray; /// # Example /// /// ``` -/// # use arrow::array::{Array, LargeListArray, Int32Array}; -/// # use arrow::datatypes::{DataType, Int32Type}; +/// # use arrow_array::{Array, LargeListArray, Int32Array, types::Int32Type}; +/// # use arrow_schema::DataType; /// let data = vec![ /// Some(vec![]), /// None, @@ -350,17 +347,10 @@ pub type LargeListArray = GenericListArray; #[cfg(test)] mod tests { - use crate::{ - alloc, - array::ArrayData, - array::Int32Array, - buffer::Buffer, - datatypes::Field, - datatypes::{Int32Type, ToByteSlice}, - util::bit_util, - }; - use super::*; + use crate::types::Int32Type; + use crate::Int32Array; + use arrow_buffer::{bit_util, Buffer, ToByteSlice}; fn create_from_buffers() -> ListArray { // Construct a value array @@ -844,7 +834,7 @@ mod tests { #[test] #[should_panic(expected = "memory is not aligned")] fn test_primitive_array_alignment() { - let ptr = alloc::allocate_aligned(8); + let ptr = arrow_buffer::alloc::allocate_aligned(8); let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32) @@ -860,7 +850,7 @@ mod tests { // https://github.com/apache/arrow-rs/issues/1545 #[cfg(not(feature = "force_validate"))] fn test_list_array_alignment() { - let ptr = alloc::allocate_aligned(8); + let ptr = arrow_buffer::alloc::allocate_aligned(8); let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); diff --git a/arrow/src/array/array_map.rs b/arrow-array/src/array/map_array.rs similarity index 96% rename from arrow/src/array/array_map.rs rename to arrow-array/src/array/map_array.rs index 471d56c9c60..bfe8d407274 100644 --- a/arrow/src/array/array_map.rs +++ b/arrow-array/src/array/map_array.rs @@ -15,20 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{StringArray, StructArray}; -use crate::buffer::Buffer; +use crate::raw_pointer::RawPtrBox; +use crate::{make_array, print_long_array, Array, ArrayRef, StringArray, StructArray}; +use arrow_buffer::{ArrowNativeType, Buffer, ToByteSlice}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; -use std::fmt; -use std::mem; use std::sync::Arc; -use super::make_array; -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayRef, -}; -use crate::datatypes::{ArrowNativeType, DataType, Field, ToByteSlice}; -use crate::error::ArrowError; - /// A nested array type where each record is a key-value map. /// Keys should always be non-null, but values can be null. /// @@ -221,15 +215,15 @@ impl Array for MapArray { /// Returns the total number of bytes of memory occupied physically by this [MapArray]. fn get_array_memory_size(&self) -> usize { - self.data.get_array_memory_size() + mem::size_of_val(self) + self.data.get_array_memory_size() + std::mem::size_of_val(self) } } -impl fmt::Debug for MapArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for MapArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "MapArray\n[\n")?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -237,16 +231,9 @@ impl fmt::Debug for MapArray { #[cfg(test)] mod tests { + use crate::{Int32Array, UInt32Array}; use std::sync::Arc; - use crate::{ - array::ArrayData, - array::{Int32Array, StructArray, UInt32Array}, - buffer::Buffer, - datatypes::Field, - datatypes::ToByteSlice, - }; - use super::*; fn create_from_buffers() -> MapArray { diff --git a/arrow/src/array/array.rs b/arrow-array/src/array/mod.rs similarity index 94% rename from arrow/src/array/array.rs rename to arrow-array/src/array/mod.rs index 2c2969c925d..d9d2807e36c 100644 --- a/arrow/src/array/array.rs +++ b/arrow-array/src/array/mod.rs @@ -15,42 +15,77 @@ // specific language governing permissions and limitations // under the License. +mod binary_array; + +use crate::types::*; +use arrow_buffer::{Buffer, MutableBuffer, ToByteSlice}; +use arrow_data::ArrayData; +use arrow_schema::{DataType, IntervalUnit, TimeUnit}; use std::any::Any; -use std::convert::From; -use std::fmt; use std::sync::Arc; -use super::*; -use crate::buffer::{Buffer, MutableBuffer}; +pub use binary_array::*; + +mod boolean_array; +pub use boolean_array::*; + +mod decimal_array; +pub use decimal_array::*; + +mod dictionary_array; +pub use dictionary_array::*; + +mod fixed_size_binary_array; +pub use fixed_size_binary_array::*; + +mod fixed_size_list_array; +pub use fixed_size_list_array::*; + +mod list_array; +pub use list_array::*; + +mod map_array; +pub use map_array::*; + +mod null_array; +pub use null_array::*; + +mod primitive_array; +pub use primitive_array::*; + +mod string_array; +pub use string_array::*; + +mod struct_array; +pub use struct_array::*; + +mod union_array; +pub use union_array::*; /// Trait for dealing with different types of array at runtime when the type of the /// array is not known in advance. -pub trait Array: fmt::Debug + Send + Sync { +pub trait Array: std::fmt::Debug + Send + Sync { /// Returns the array as [`Any`](std::any::Any) so that it can be /// downcasted to a specific implementation. /// /// # Example: /// /// ``` - /// use std::sync::Arc; - /// use arrow::array::Int32Array; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{Int32Array, RecordBatch}; + /// # use arrow_schema::{Schema, Field, DataType, ArrowError}; /// - /// # fn main() -> arrow::error::Result<()> { /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let batch = RecordBatch::try_new( /// Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])), /// vec![Arc::new(id)] - /// )?; + /// ).unwrap(); /// /// let int32array = batch /// .column(0) /// .as_any() /// .downcast_ref::() /// .expect("Failed to downcast"); - /// # Ok(()) - /// # } /// ``` fn as_any(&self) -> &dyn Any; @@ -65,13 +100,13 @@ pub trait Array: fmt::Debug + Send + Sync { self.data() } - /// Returns a reference to the [`DataType`](crate::datatypes::DataType) of this array. + /// Returns a reference to the [`DataType`](arrow_schema::DataType) of this array. /// /// # Example: /// /// ``` - /// use arrow::datatypes::DataType; - /// use arrow::array::{Array, Int32Array}; + /// use arrow_schema::DataType; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// @@ -86,7 +121,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// // Make slice over the values [2, 3, 4] @@ -103,7 +138,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// @@ -118,7 +153,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// @@ -135,7 +170,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// // Make slice over the values [2, 3, 4] @@ -154,7 +189,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![Some(1), None]); /// @@ -171,7 +206,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// let array = Int32Array::from(vec![Some(1), None]); /// @@ -187,7 +222,7 @@ pub trait Array: fmt::Debug + Send + Sync { /// # Example: /// /// ``` - /// use arrow::array::{Array, Int32Array}; + /// use arrow_array::{Array, Int32Array}; /// /// // Construct an array with values [1, NULL, NULL] /// let array = Int32Array::from(vec![Some(1), None, None]); @@ -558,8 +593,8 @@ pub fn make_array(data: ArrayData) -> ArrayRef { /// /// ``` /// use std::sync::Arc; -/// use arrow::datatypes::DataType; -/// use arrow::array::{ArrayRef, Int32Array, new_empty_array}; +/// use arrow_schema::DataType; +/// use arrow_array::{ArrayRef, Int32Array, new_empty_array}; /// /// let empty_array = new_empty_array(&DataType::Int32); /// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec)); @@ -576,8 +611,8 @@ pub fn new_empty_array(data_type: &DataType) -> ArrayRef { /// /// ``` /// use std::sync::Arc; -/// use arrow::datatypes::DataType; -/// use arrow::array::{ArrayRef, Int32Array, new_null_array}; +/// use arrow_schema::DataType; +/// use arrow_array::{ArrayRef, Int32Array, new_null_array}; /// /// let null_array = new_null_array(&DataType::Int32, 3); /// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None])); @@ -788,14 +823,14 @@ fn new_null_sized_decimal( } // Helper function for printing potentially long arrays. -pub(super) fn print_long_array( +pub(crate) fn print_long_array( array: &A, - f: &mut fmt::Formatter, + f: &mut std::fmt::Formatter, print_item: F, -) -> fmt::Result +) -> std::fmt::Result where A: Array, - F: Fn(&A, usize, &mut fmt::Formatter) -> fmt::Result, + F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result, { let head = std::cmp::min(10, array.len()); @@ -831,6 +866,7 @@ where #[cfg(test)] mod tests { use super::*; + use arrow_schema::Field; #[test] fn test_empty_primitive() { diff --git a/arrow/src/array/null.rs b/arrow-array/src/array/null_array.rs similarity index 93% rename from arrow/src/array/null.rs rename to arrow-array/src/array/null_array.rs index 467121f6ccf..d796324f663 100644 --- a/arrow/src/array/null.rs +++ b/arrow-array/src/array/null_array.rs @@ -17,11 +17,10 @@ //! Contains the `NullArray` type. +use crate::Array; +use arrow_data::ArrayData; +use arrow_schema::DataType; use std::any::Any; -use std::fmt; - -use crate::array::{Array, ArrayData}; -use crate::datatypes::*; /// An Array where all elements are nulls /// @@ -30,16 +29,12 @@ use crate::datatypes::*; /// # Example: Create an array /// /// ``` -/// use arrow::array::{Array, NullArray}; +/// use arrow_array::{Array, NullArray}; /// -/// # fn main() -> arrow::error::Result<()> { /// let array = NullArray::new(10); /// /// assert_eq!(array.len(), 10); /// assert_eq!(array.null_count(), 10); -/// -/// # Ok(()) -/// # } /// ``` pub struct NullArray { data: ArrayData, @@ -116,8 +111,8 @@ impl From for ArrayData { } } -impl fmt::Debug for NullArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for NullArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "NullArray({})", self.len()) } } diff --git a/arrow/src/array/array_primitive.rs b/arrow-array/src/array/primitive_array.rs similarity index 81% rename from arrow/src/array/array_primitive.rs rename to arrow-array/src/array/primitive_array.rs index 57168b7b9e6..f9e4e7675da 100644 --- a/arrow/src/array/array_primitive.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -15,34 +15,194 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::convert::From; -use std::fmt; -use std::iter::{FromIterator, IntoIterator}; -use std::mem; - -use chrono::{prelude::*, Duration}; - -use super::array::print_long_array; -use super::raw_pointer::RawPtrBox; -use super::*; -use crate::temporal_conversions; -use crate::util::bit_util; -use crate::{ - buffer::{Buffer, MutableBuffer}, - util::trusted_len_unzip, -}; - -use crate::array::array::ArrayAccessor; +use crate::builder::{BooleanBufferBuilder, PrimitiveBuilder}; +use crate::iterator::PrimitiveIter; +use crate::raw_pointer::RawPtrBox; +use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time}; +use crate::trusted_len::trusted_len_unzip; +use crate::types::*; +use crate::{print_long_array, Array, ArrayAccessor}; +use arrow_buffer::{bit_util, ArrowNativeType, Buffer, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::DataType; +use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime}; use half::f16; +use std::any::Any; + +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Int8Array; +/// let arr : Int8Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type Int8Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Int16Array; +/// let arr : Int16Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type Int16Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Int32Array; +/// let arr : Int32Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type Int32Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Int64Array; +/// let arr : Int64Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type Int64Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::UInt8Array; +/// let arr : UInt8Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type UInt8Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::UInt16Array; +/// let arr : UInt16Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type UInt16Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::UInt32Array; +/// let arr : UInt32Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type UInt32Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::UInt64Array; +/// let arr : UInt64Array = [Some(1), Some(2)].into_iter().collect(); +/// ``` +pub type UInt64Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Float16Array; +/// use half::f16; +/// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); +/// ``` +pub type Float16Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Float32Array; +/// let arr : Float32Array = [Some(1.0), Some(2.0)].into_iter().collect(); +/// ``` +pub type Float32Array = PrimitiveArray; +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Float64Array; +/// let arr : Float64Array = [Some(1.0), Some(2.0)].into_iter().collect(); +/// ``` +pub type Float64Array = PrimitiveArray; + +/// +/// A primitive array where each element is of type [TimestampSecondType]. +/// See also [`Timestamp`](arrow_schema::DataType::Timestamp). +/// +/// # Example: UTC timestamps post epoch +/// ``` +/// # use arrow_array::TimestampSecondArray; +/// use chrono::FixedOffset; +/// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00 +/// let arr = TimestampSecondArray::from_vec(vec![11111111], None); +/// // OR +/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(11111111)], None); +/// let utc_offset = FixedOffset::east(0); +/// +/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11") +/// ``` +/// +/// # Example: UTC timestamps pre epoch +/// ``` +/// # use arrow_array::TimestampSecondArray; +/// use chrono::FixedOffset; +/// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00 +/// let arr = TimestampSecondArray::from_vec(vec![-11111111], None); +/// // OR +/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(-11111111)], None); +/// let utc_offset = FixedOffset::east(0); +/// +/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49") +/// ``` +/// +/// # Example: With timezone specified +/// ``` +/// # use arrow_array::TimestampSecondArray; +/// use chrono::FixedOffset; +/// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00 +/// let arr = TimestampSecondArray::from_vec(vec![11111111], Some("+10:00".to_string())); +/// // OR +/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(11111111)], Some("+10:00".to_string())); +/// let sydney_offset = FixedOffset::east(10 * 60 * 60); +/// +/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11") +/// ``` +/// +pub type TimestampSecondArray = PrimitiveArray; +/// A primitive array where each element is of type `TimestampMillisecondType.` +/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) +pub type TimestampMillisecondArray = PrimitiveArray; +/// A primitive array where each element is of type `TimestampMicrosecondType.` +/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) +pub type TimestampMicrosecondArray = PrimitiveArray; +/// A primitive array where each element is of type `TimestampNanosecondType.` +/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) +pub type TimestampNanosecondArray = PrimitiveArray; +pub type Date32Array = PrimitiveArray; +pub type Date64Array = PrimitiveArray; +pub type Time32SecondArray = PrimitiveArray; +pub type Time32MillisecondArray = PrimitiveArray; +pub type Time64MicrosecondArray = PrimitiveArray; +pub type Time64NanosecondArray = PrimitiveArray; +pub type IntervalYearMonthArray = PrimitiveArray; +pub type IntervalDayTimeArray = PrimitiveArray; +pub type IntervalMonthDayNanoArray = PrimitiveArray; +pub type DurationSecondArray = PrimitiveArray; +pub type DurationMillisecondArray = PrimitiveArray; +pub type DurationMicrosecondArray = PrimitiveArray; +pub type DurationNanosecondArray = PrimitiveArray; + +/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the +/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`]. +pub trait ArrowPrimitiveType: 'static { + /// Corresponding Rust native type for the primitive type. + type Native: ArrowNativeType; + + /// the corresponding Arrow data type of this primitive type. + const DATA_TYPE: DataType; + + /// Returns the byte width of this primitive type. + fn get_byte_width() -> usize { + std::mem::size_of::() + } + + /// Returns a default value of this primitive type. + /// + /// This is useful for aggregate array ops like `sum()`, `mean()`. + fn default_value() -> Self::Native { + Default::default() + } +} /// Array whose elements are of primitive types. /// /// # Example: From an iterator of values /// /// ``` -/// use arrow::array::{Array, PrimitiveArray}; -/// use arrow::datatypes::Int32Type; +/// use arrow_array::{Array, PrimitiveArray, types::Int32Type}; /// let arr: PrimitiveArray = PrimitiveArray::from_iter_values((0..10).map(|x| x + 1)); /// assert_eq!(10, arr.len()); /// assert_eq!(0, arr.null_count()); @@ -125,7 +285,7 @@ impl PrimitiveArray { let data = unsafe { ArrayData::new_unchecked( T::DATA_TYPE, - val_buf.len() / mem::size_of::<::Native>(), + val_buf.len() / std::mem::size_of::<::Native>(), None, None, 0, @@ -143,7 +303,7 @@ impl PrimitiveArray { let data = unsafe { ArrayData::new_unchecked( T::DATA_TYPE, - val_buf.len() / mem::size_of::<::Native>(), + val_buf.len() / std::mem::size_of::<::Native>(), None, None, 0, @@ -206,79 +366,9 @@ impl<'a, T: ArrowPrimitiveType> ArrayAccessor for &'a PrimitiveArray { } } -pub(crate) fn as_datetime(v: i64) -> Option { - match T::DATA_TYPE { - DataType::Date32 => Some(temporal_conversions::date32_to_datetime(v as i32)), - DataType::Date64 => Some(temporal_conversions::date64_to_datetime(v)), - DataType::Time32(_) | DataType::Time64(_) => None, - DataType::Timestamp(unit, _) => match unit { - TimeUnit::Second => Some(temporal_conversions::timestamp_s_to_datetime(v)), - TimeUnit::Millisecond => { - Some(temporal_conversions::timestamp_ms_to_datetime(v)) - } - TimeUnit::Microsecond => { - Some(temporal_conversions::timestamp_us_to_datetime(v)) - } - TimeUnit::Nanosecond => { - Some(temporal_conversions::timestamp_ns_to_datetime(v)) - } - }, - // interval is not yet fully documented [ARROW-3097] - DataType::Interval(_) => None, - _ => None, - } -} - -fn as_date(v: i64) -> Option { - as_datetime::(v).map(|datetime| datetime.date()) -} - -pub(crate) fn as_time(v: i64) -> Option { - match T::DATA_TYPE { - DataType::Time32(unit) => { - // safe to immediately cast to u32 as `self.value(i)` is positive i32 - let v = v as u32; - match unit { - TimeUnit::Second => Some(temporal_conversions::time32s_to_time(v as i32)), - TimeUnit::Millisecond => { - Some(temporal_conversions::time32ms_to_time(v as i32)) - } - _ => None, - } - } - DataType::Time64(unit) => match unit { - TimeUnit::Microsecond => Some(temporal_conversions::time64us_to_time(v)), - TimeUnit::Nanosecond => Some(temporal_conversions::time64ns_to_time(v)), - _ => None, - }, - DataType::Timestamp(_, _) => as_datetime::(v).map(|datetime| datetime.time()), - DataType::Date32 | DataType::Date64 => Some(NaiveTime::from_hms(0, 0, 0)), - DataType::Interval(_) => None, - _ => None, - } -} - -fn as_duration(v: i64) -> Option { - match T::DATA_TYPE { - DataType::Duration(unit) => match unit { - TimeUnit::Second => Some(temporal_conversions::duration_s_to_duration(v)), - TimeUnit::Millisecond => { - Some(temporal_conversions::duration_ms_to_duration(v)) - } - TimeUnit::Microsecond => { - Some(temporal_conversions::duration_us_to_duration(v)) - } - TimeUnit::Nanosecond => { - Some(temporal_conversions::duration_ns_to_duration(v)) - } - }, - _ => None, - } -} - -impl PrimitiveArray +impl PrimitiveArray where - i64: std::convert::From, + i64: From, { /// Returns value as a chrono `NaiveDateTime`, handling time resolution /// @@ -322,8 +412,8 @@ where } } -impl fmt::Debug for PrimitiveArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for PrimitiveArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?; print_long_array(self, f, |array, index, f| match T::DATA_TYPE { DataType::Date32 | DataType::Date64 => { @@ -347,7 +437,7 @@ impl fmt::Debug for PrimitiveArray { None => write!(f, "null"), } } - _ => fmt::Debug::fmt(&array.value(index), f), + _ => std::fmt::Debug::fmt(&array.value(index), f), })?; write!(f, "]") } @@ -574,7 +664,7 @@ impl PrimitiveArray { // TODO: duplicated from def_numeric_from_vec! macro, it looks possible to convert to generic let data_len = data.len(); let mut null_buf = MutableBuffer::new_null(data_len); - let mut val_buf = MutableBuffer::new(data_len * mem::size_of::()); + let mut val_buf = MutableBuffer::new(data_len * std::mem::size_of::()); { let null_slice = null_buf.as_slice_mut(); @@ -618,12 +708,7 @@ impl From for PrimitiveArray { #[cfg(test)] mod tests { use super::*; - - use std::thread; - - use crate::buffer::Buffer; - use crate::compute::eq_dyn; - use crate::datatypes::DataType; + use crate::BooleanArray; #[test] fn test_primitive_array_from_vec() { @@ -1099,7 +1184,7 @@ mod tests { #[test] fn test_access_array_concurrently() { let a = Int32Array::from(vec![5, 6, 7, 8, 9]); - let ret = thread::spawn(move || a.value(3)).join(); + let ret = std::thread::spawn(move || a.value(3)).join(); assert!(ret.is_ok()); assert_eq!(8, ret.ok().unwrap()); @@ -1110,11 +1195,7 @@ mod tests { let array1: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().collect(); let array2: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().map(Some).collect(); - let result = eq_dyn(&array1, &array2); - assert_eq!( - result.unwrap(), - BooleanArray::from(vec![true, true, true, true, true]) - ); + assert_eq!(array1, array2); } #[cfg(feature = "chrono-tz")] diff --git a/arrow/src/array/array_string.rs b/arrow-array/src/array/string_array.rs similarity index 97% rename from arrow/src/array/array_string.rs rename to arrow-array/src/array/string_array.rs index f3ecaa2d559..6e34f125bb4 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow-array/src/array/string_array.rs @@ -15,18 +15,16 @@ // specific language governing permissions and limitations // under the License. -use std::convert::From; -use std::fmt; -use std::{any::Any, iter::FromIterator}; - -use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, - GenericBinaryArray, GenericListArray, GenericStringIter, OffsetSizeTrait, +use crate::iterator::GenericStringIter; +use crate::raw_pointer::RawPtrBox; +use crate::{ + print_long_array, Array, ArrayAccessor, GenericBinaryArray, GenericListArray, + OffsetSizeTrait, }; -use crate::array::array::ArrayAccessor; -use crate::buffer::Buffer; -use crate::util::bit_util; -use crate::{buffer::MutableBuffer, datatypes::DataType}; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::ArrayData; +use arrow_schema::DataType; +use std::any::Any; /// Generic struct for \[Large\]StringArray /// @@ -303,13 +301,13 @@ impl<'a, T: OffsetSizeTrait> GenericStringArray { } } -impl fmt::Debug for GenericStringArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for GenericStringArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let prefix = OffsetSize::PREFIX; write!(f, "{}StringArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + std::fmt::Debug::fmt(&array.value(index), f) })?; write!(f, "]") } @@ -414,7 +412,7 @@ impl From> for Array /// Example /// /// ``` -/// use arrow::array::StringArray; +/// use arrow_array::StringArray; /// let array = StringArray::from(vec![Some("foo"), None, Some("bar")]); /// assert_eq!(array.value(0), "foo"); /// ``` @@ -426,7 +424,7 @@ pub type StringArray = GenericStringArray; /// Example /// /// ``` -/// use arrow::array::LargeStringArray; +/// use arrow_array::LargeStringArray; /// let array = LargeStringArray::from(vec![Some("foo"), None, Some("bar")]); /// assert_eq!(array.value(2), "bar"); /// ``` @@ -434,13 +432,9 @@ pub type LargeStringArray = GenericStringArray; #[cfg(test)] mod tests { - - use crate::{ - array::{ListBuilder, StringBuilder}, - datatypes::Field, - }; - use super::*; + use crate::builder::{ListBuilder, StringBuilder}; + use arrow_schema::Field; #[test] fn test_string_array_from_u8_slice() { diff --git a/arrow/src/array/array_struct.rs b/arrow-array/src/array/struct_array.rs similarity index 95% rename from arrow/src/array/array_struct.rs rename to arrow-array/src/array/struct_array.rs index a6c3146aef2..841d3235f64 100644 --- a/arrow/src/array/array_struct.rs +++ b/arrow-array/src/array/struct_array.rs @@ -15,18 +15,12 @@ // specific language governing permissions and limitations // under the License. +use crate::{make_array, Array, ArrayRef}; +use arrow_buffer::buffer::buffer_bin_or; +use arrow_buffer::Buffer; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; -use std::convert::{From, TryFrom}; -use std::fmt; -use std::iter::IntoIterator; - -use super::{make_array, Array, ArrayData, ArrayRef}; -use crate::datatypes::DataType; -use crate::error::{ArrowError, Result}; -use crate::{ - buffer::{buffer_bin_or, Buffer}, - datatypes::Field, -}; /// A nested array type where each child (called *field*) is represented by a separate /// array. @@ -34,8 +28,8 @@ use crate::{ /// /// ``` /// use std::sync::Arc; -/// use arrow::array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray}; -/// use arrow::datatypes::{DataType, Field}; +/// use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray}; +/// use arrow_schema::{DataType, Field}; /// /// let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true])); /// let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); @@ -130,7 +124,7 @@ impl TryFrom> for StructArray { /// builds a StructArray from a vector of names and arrays. /// This errors if the values have a different length. /// An entry is set to Null when all values are null. - fn try_from(values: Vec<(&str, ArrayRef)>) -> Result { + fn try_from(values: Vec<(&str, ArrayRef)>) -> Result { let values_len = values.len(); // these will be populated @@ -239,8 +233,8 @@ impl From> for StructArray { } } -impl fmt::Debug for StructArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for StructArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "StructArray\n[\n")?; for (child_index, name) in self.column_names().iter().enumerate() { let column = self.column(child_index); @@ -251,7 +245,7 @@ impl fmt::Debug for StructArray { name, column.data_type() )?; - fmt::Debug::fmt(column, f)?; + std::fmt::Debug::fmt(column, f)?; writeln!(f)?; } write!(f, "]") @@ -290,17 +284,12 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray { mod tests { use super::*; - use std::sync::Arc; - use crate::{ - array::BooleanArray, array::Float32Array, array::Float64Array, array::Int32Array, - array::StringArray, bitmap::Bitmap, + BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray, }; - use crate::{ - array::Int64Array, - datatypes::{DataType, Field}, - }; - use crate::{buffer::Buffer, datatypes::ToByteSlice}; + use arrow_buffer::ToByteSlice; + use arrow_data::Bitmap; + use std::sync::Arc; #[test] fn test_struct_array_builder() { diff --git a/arrow/src/array/array_union.rs b/arrow-array/src/array/union_array.rs similarity index 97% rename from arrow/src/array/array_union.rs rename to arrow-array/src/array/union_array.rs index b221239b2db..f62a84cf03c 100644 --- a/arrow/src/array/array_union.rs +++ b/arrow-array/src/array/union_array.rs @@ -15,26 +15,24 @@ // specific language governing permissions and limitations // under the License. +use crate::{make_array, Array, ArrayRef}; +use arrow_buffer::Buffer; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field, UnionMode}; /// Contains the `UnionArray` type. /// -use crate::array::{make_array, Array, ArrayData, ArrayRef}; -use crate::buffer::Buffer; -use crate::datatypes::*; -use crate::error::{ArrowError, Result}; - -use core::fmt; use std::any::Any; /// An Array that can represent slots of varying types. /// /// Each slot in a [UnionArray] can have a value chosen from a number /// of types. Each of the possible types are named like the fields of -/// a [`StructArray`](crate::array::StructArray). A `UnionArray` can +/// a [`StructArray`](crate::StructArray). A `UnionArray` can /// have two possible memory layouts, "dense" or "sparse". For more /// information on please see the /// [specification](https://arrow.apache.org/docs/format/Columnar.html#union-layout). /// -/// [UnionBuilder](crate::array::UnionBuilder) can be used to +/// [UnionBuilder](crate::builder::UnionBuilder) can be used to /// create [UnionArray]'s of primitive types. `UnionArray`'s of nested /// types are also supported but not via `UnionBuilder`, see the tests /// for examples. @@ -42,10 +40,10 @@ use std::any::Any; /// # Examples /// ## Create a dense UnionArray `[1, 3.2, 34]` /// ``` -/// use arrow::buffer::Buffer; -/// use arrow::datatypes::*; +/// use arrow_buffer::Buffer; +/// use arrow_schema::*; /// use std::sync::Arc; -/// use arrow::array::{Array, Int32Array, Float64Array, UnionArray}; +/// use arrow_array::{Array, Int32Array, Float64Array, UnionArray}; /// /// let int_array = Int32Array::from(vec![1, 34]); /// let float_array = Float64Array::from(vec![3.2]); @@ -76,10 +74,10 @@ use std::any::Any; /// /// ## Create a sparse UnionArray `[1, 3.2, 34]` /// ``` -/// use arrow::buffer::Buffer; -/// use arrow::datatypes::*; +/// use arrow_buffer::Buffer; +/// use arrow_schema::*; /// use std::sync::Arc; -/// use arrow::array::{Array, Int32Array, Float64Array, UnionArray}; +/// use arrow_array::{Array, Int32Array, Float64Array, UnionArray}; /// /// let int_array = Int32Array::from(vec![Some(1), None, Some(34)]); /// let float_array = Float64Array::from(vec![None, Some(3.2), None]); @@ -174,7 +172,7 @@ impl UnionArray { type_ids: Buffer, value_offsets: Option, child_arrays: Vec<(Field, ArrayRef)>, - ) -> Result { + ) -> Result { if let Some(b) = &value_offsets { if ((type_ids.len()) * 4) != b.len() { return Err(ArrowError::InvalidArgumentError( @@ -339,8 +337,8 @@ impl Array for UnionArray { } } -impl fmt::Debug for UnionArray { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl std::fmt::Debug for UnionArray { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let header = if self.is_dense() { "UnionArray(Dense)\n[" } else { @@ -365,7 +363,7 @@ impl fmt::Debug for UnionArray { *name, column.data_type() )?; - fmt::Debug::fmt(column, f)?; + std::fmt::Debug::fmt(column, f)?; writeln!(f)?; } writeln!(f, "]") @@ -376,13 +374,13 @@ impl fmt::Debug for UnionArray { mod tests { use super::*; + use crate::builder::UnionBuilder; + use crate::types::{Float32Type, Float64Type, Int32Type, Int64Type}; + use crate::RecordBatch; + use crate::{Float64Array, Int32Array, Int64Array, StringArray}; + use arrow_schema::Schema; use std::sync::Arc; - use crate::array::*; - use crate::buffer::Buffer; - use crate::datatypes::{DataType, Field}; - use crate::record_batch::RecordBatch; - #[test] fn test_dense_i32() { let mut builder = UnionBuilder::new_dense(); diff --git a/arrow/src/array/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs similarity index 98% rename from arrow/src/array/builder/boolean_buffer_builder.rs rename to arrow-array/src/builder/boolean_buffer_builder.rs index 5b6d1ce4847..16c6750d1d9 100644 --- a/arrow/src/array/builder/boolean_buffer_builder.rs +++ b/arrow-array/src/builder/boolean_buffer_builder.rs @@ -15,11 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::buffer::{Buffer, MutableBuffer}; - -use super::Range; - -use crate::util::bit_util; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_data::bit_mask; +use std::ops::Range; #[derive(Debug)] pub struct BooleanBufferBuilder { @@ -139,7 +137,7 @@ impl BooleanBufferBuilder { let offset_write = self.len; let len = range.end - range.start; self.advance(len); - crate::util::bit_mask::set_bits( + bit_mask::set_bits( self.buffer.as_slice_mut(), to_set, offset_write, diff --git a/arrow/src/array/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs similarity index 85% rename from arrow/src/array/builder/boolean_builder.rs rename to arrow-array/src/builder/boolean_builder.rs index eed14a55fd9..96711dd1f6f 100644 --- a/arrow/src/array/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -15,21 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, BooleanBufferBuilder}; +use crate::{ArrayRef, BooleanArray}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; -use crate::array::ArrayBuilder; -use crate::array::ArrayData; -use crate::array::ArrayRef; -use crate::array::BooleanArray; -use crate::datatypes::DataType; - -use crate::error::ArrowError; -use crate::error::Result; - -use super::BooleanBufferBuilder; -use super::NullBufferBuilder; - /// Array builder for fixed-width primitive types /// /// # Example @@ -37,28 +30,29 @@ use super::NullBufferBuilder; /// Create a `BooleanArray` from a `BooleanBuilder` /// /// ``` -/// use arrow::array::{Array, BooleanArray, BooleanBuilder}; /// -/// let mut b = BooleanBuilder::new(); -/// b.append_value(true); -/// b.append_null(); -/// b.append_value(false); -/// b.append_value(true); -/// let arr = b.finish(); +/// # use arrow_array::{Array, BooleanArray, builder::BooleanBuilder}; +/// +/// let mut b = BooleanBuilder::new(); +/// b.append_value(true); +/// b.append_null(); +/// b.append_value(false); +/// b.append_value(true); +/// let arr = b.finish(); /// -/// assert_eq!(4, arr.len()); -/// assert_eq!(1, arr.null_count()); -/// assert_eq!(true, arr.value(0)); -/// assert!(arr.is_valid(0)); -/// assert!(!arr.is_null(0)); -/// assert!(!arr.is_valid(1)); -/// assert!(arr.is_null(1)); -/// assert_eq!(false, arr.value(2)); -/// assert!(arr.is_valid(2)); -/// assert!(!arr.is_null(2)); -/// assert_eq!(true, arr.value(3)); -/// assert!(arr.is_valid(3)); -/// assert!(!arr.is_null(3)); +/// assert_eq!(4, arr.len()); +/// assert_eq!(1, arr.null_count()); +/// assert_eq!(true, arr.value(0)); +/// assert!(arr.is_valid(0)); +/// assert!(!arr.is_null(0)); +/// assert!(!arr.is_valid(1)); +/// assert!(arr.is_null(1)); +/// assert_eq!(false, arr.value(2)); +/// assert!(arr.is_valid(2)); +/// assert!(!arr.is_null(2)); +/// assert_eq!(true, arr.value(3)); +/// assert!(arr.is_valid(3)); +/// assert!(!arr.is_null(3)); /// ``` #[derive(Debug)] pub struct BooleanBuilder { @@ -132,7 +126,11 @@ impl BooleanBuilder { /// /// Returns an error if the slices are of different lengths #[inline] - pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> { + pub fn append_values( + &mut self, + values: &[bool], + is_valid: &[bool], + ) -> Result<(), ArrowError> { if values.len() != is_valid.len() { Err(ArrowError::InvalidArgumentError( "Value and validity lengths must be equal".to_string(), @@ -193,7 +191,8 @@ impl ArrayBuilder for BooleanBuilder { #[cfg(test)] mod tests { use super::*; - use crate::{array::Array, buffer::Buffer}; + use crate::Array; + use arrow_buffer::Buffer; #[test] fn test_boolean_array_builder() { diff --git a/arrow/src/array/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs similarity index 75% rename from arrow/src/array/builder/buffer_builder.rs rename to arrow-array/src/builder/buffer_builder.rs index a6a81dfd6c0..2da11cb2320 100644 --- a/arrow/src/array/builder/buffer_builder.rs +++ b/arrow-array/src/builder/buffer_builder.rs @@ -15,17 +15,60 @@ // specific language governing permissions and limitations // under the License. -use std::mem; - -use crate::buffer::{Buffer, MutableBuffer}; -use crate::datatypes::ArrowNativeType; - -use super::PhantomData; - -/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object. +use crate::array::ArrowPrimitiveType; +use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer}; +use std::marker::PhantomData; + +use crate::types::*; + +pub type Int8BufferBuilder = BufferBuilder; +pub type Int16BufferBuilder = BufferBuilder; +pub type Int32BufferBuilder = BufferBuilder; +pub type Int64BufferBuilder = BufferBuilder; +pub type UInt8BufferBuilder = BufferBuilder; +pub type UInt16BufferBuilder = BufferBuilder; +pub type UInt32BufferBuilder = BufferBuilder; +pub type UInt64BufferBuilder = BufferBuilder; +pub type Float32BufferBuilder = BufferBuilder; +pub type Float64BufferBuilder = BufferBuilder; + +pub type TimestampSecondBufferBuilder = + BufferBuilder<::Native>; +pub type TimestampMillisecondBufferBuilder = + BufferBuilder<::Native>; +pub type TimestampMicrosecondBufferBuilder = + BufferBuilder<::Native>; +pub type TimestampNanosecondBufferBuilder = + BufferBuilder<::Native>; +pub type Date32BufferBuilder = BufferBuilder<::Native>; +pub type Date64BufferBuilder = BufferBuilder<::Native>; +pub type Time32SecondBufferBuilder = + BufferBuilder<::Native>; +pub type Time32MillisecondBufferBuilder = + BufferBuilder<::Native>; +pub type Time64MicrosecondBufferBuilder = + BufferBuilder<::Native>; +pub type Time64NanosecondBufferBuilder = + BufferBuilder<::Native>; +pub type IntervalYearMonthBufferBuilder = + BufferBuilder<::Native>; +pub type IntervalDayTimeBufferBuilder = + BufferBuilder<::Native>; +pub type IntervalMonthDayNanoBufferBuilder = + BufferBuilder<::Native>; +pub type DurationSecondBufferBuilder = + BufferBuilder<::Native>; +pub type DurationMillisecondBufferBuilder = + BufferBuilder<::Native>; +pub type DurationMicrosecondBufferBuilder = + BufferBuilder<::Native>; +pub type DurationNanosecondBufferBuilder = + BufferBuilder<::Native>; + +/// Builder for creating a [`Buffer`](arrow_buffer::Buffer) object. /// -/// A [`Buffer`](crate::buffer::Buffer) is the underlying data -/// structure of Arrow's [`Arrays`](crate::array::Array). +/// A [`Buffer`](arrow_buffer::Buffer) is the underlying data +/// structure of Arrow's [`Arrays`](crate::Array). /// /// For all supported types, there are type definitions for the /// generic version of `BufferBuilder`, e.g. `UInt8BufferBuilder`. @@ -33,17 +76,14 @@ use super::PhantomData; /// # Example: /// /// ``` -/// use arrow::array::UInt8BufferBuilder; +/// # use arrow_array::builder::UInt8BufferBuilder; /// -/// # fn main() -> arrow::error::Result<()> { /// let mut builder = UInt8BufferBuilder::new(100); /// builder.append_slice(&[42, 43, 44]); /// builder.append(45); /// let buffer = builder.finish(); /// /// assert_eq!(unsafe { buffer.typed_data::() }, &[42, 43, 44, 45]); -/// # Ok(()) -/// # } /// ``` #[derive(Debug)] pub struct BufferBuilder { @@ -67,7 +107,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// @@ -75,7 +115,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn new(capacity: usize) -> Self { - let buffer = MutableBuffer::new(capacity * mem::size_of::()); + let buffer = MutableBuffer::new(capacity * std::mem::size_of::()); Self { buffer, @@ -89,7 +129,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append(42); @@ -105,7 +145,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append(42); @@ -136,7 +176,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.advance(2); @@ -145,7 +185,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn advance(&mut self, i: usize) { - self.buffer.extend_zeros(i * mem::size_of::()); + self.buffer.extend_zeros(i * std::mem::size_of::()); self.len += i; } @@ -154,7 +194,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.reserve(10); @@ -163,7 +203,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn reserve(&mut self, n: usize) { - self.buffer.reserve(n * mem::size_of::()); + self.buffer.reserve(n * std::mem::size_of::()); } /// Appends a value of type `T` into the builder, @@ -172,7 +212,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append(42); @@ -192,7 +232,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append_n(10, 42); @@ -213,7 +253,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt32BufferBuilder; + /// # use arrow_array::builder::UInt32BufferBuilder; /// /// let mut builder = UInt32BufferBuilder::new(10); /// builder.append_n_zeroed(3); @@ -222,7 +262,7 @@ impl BufferBuilder { /// assert_eq!(builder.as_slice(), &[0, 0, 0]) #[inline] pub fn append_n_zeroed(&mut self, n: usize) { - self.buffer.extend_zeros(n * mem::size_of::()); + self.buffer.extend_zeros(n * std::mem::size_of::()); self.len += n; } @@ -231,7 +271,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append_slice(&[42, 44, 46]); @@ -247,7 +287,7 @@ impl BufferBuilder { /// View the contents of this buffer as a slice /// /// ``` - /// use arrow::array::Float64BufferBuilder; + /// # use arrow_array::builder::Float64BufferBuilder; /// /// let mut builder = Float64BufferBuilder::new(10); /// builder.append(1.3); @@ -270,7 +310,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::Float32BufferBuilder; + /// # use arrow_array::builder::Float32BufferBuilder; /// /// let mut builder = Float32BufferBuilder::new(10); /// @@ -297,7 +337,7 @@ impl BufferBuilder { /// # Example: /// /// ``` - /// use arrow::array::UInt16BufferBuilder; + /// # use arrow_array::builder::UInt16BufferBuilder; /// /// let mut builder = UInt16BufferBuilder::new(10); /// @@ -312,7 +352,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn truncate(&mut self, len: usize) { - self.buffer.truncate(len * mem::size_of::()); + self.buffer.truncate(len * std::mem::size_of::()); self.len = len; } @@ -333,12 +373,12 @@ impl BufferBuilder { self.len += len; } - /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer). + /// Resets this builder and returns an immutable [`Buffer`](arrow_buffer::Buffer). /// /// # Example: /// /// ``` - /// use arrow::array::UInt8BufferBuilder; + /// # use arrow_array::builder::UInt8BufferBuilder; /// /// let mut builder = UInt8BufferBuilder::new(10); /// builder.append_slice(&[42, 44, 46]); @@ -357,11 +397,10 @@ impl BufferBuilder { #[cfg(test)] mod tests { - use crate::array::array::Array; - use crate::array::builder::ArrayBuilder; - use crate::array::Int32BufferBuilder; - use crate::array::Int8Builder; - use crate::array::UInt8BufferBuilder; + use crate::builder::{ + ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder, + }; + use crate::Array; #[test] fn test_builder_i32_empty() { diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow-array/src/builder/decimal_builder.rs similarity index 94% rename from arrow/src/array/builder/decimal_builder.rs rename to arrow-array/src/builder/decimal_builder.rs index daa30eebed9..096cbec3a6c 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow-array/src/builder/decimal_builder.rs @@ -15,20 +15,15 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::sync::Arc; - -use crate::array::array_decimal::Decimal256Array; -use crate::array::ArrayRef; -use crate::array::Decimal128Array; -use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder}; - -use crate::error::{ArrowError, Result}; - -use crate::datatypes::{ +use crate::builder::{ArrayBuilder, FixedSizeBinaryBuilder}; +use crate::decimal::Decimal256; +use crate::{ArrayRef, Decimal128Array, Decimal256Array}; +use arrow_data::decimal::{ validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, }; -use crate::util::decimal::Decimal256; +use arrow_schema::ArrowError; +use std::any::Any; +use std::sync::Arc; /// Array Builder for [`Decimal128Array`] /// @@ -90,7 +85,7 @@ impl Decimal128Builder { /// Appends a decimal value into the builder. #[inline] - pub fn append_value(&mut self, value: impl Into) -> Result<()> { + pub fn append_value(&mut self, value: impl Into) -> Result<(), ArrowError> { let value = value.into(); if self.value_validation { validate_decimal_precision(value, self.precision)? @@ -107,7 +102,10 @@ impl Decimal128Builder { /// Appends an `Option>` into the builder. #[inline] - pub fn append_option(&mut self, value: Option>) -> Result<()> { + pub fn append_option( + &mut self, + value: Option>, + ) -> Result<(), ArrowError> { match value { None => { self.append_null(); @@ -192,7 +190,7 @@ impl Decimal256Builder { /// /// Returns an error if `value` has different precision, scale or length in bytes than this builder #[inline] - pub fn append_value(&mut self, value: &Decimal256) -> Result<()> { + pub fn append_value(&mut self, value: &Decimal256) -> Result<(), ArrowError> { let value = if self.value_validation { let raw_bytes = value.raw_value(); validate_decimal256_precision_with_lt_bytes(raw_bytes, self.precision)?; @@ -225,7 +223,10 @@ impl Decimal256Builder { /// Appends an `Option<&Decimal256>` into the builder. #[inline] - pub fn append_option(&mut self, value: Option<&Decimal256>) -> Result<()> { + pub fn append_option( + &mut self, + value: Option<&Decimal256>, + ) -> Result<(), ArrowError> { match value { None => { self.append_null(); @@ -248,13 +249,11 @@ impl Decimal256Builder { #[cfg(test)] mod tests { use super::*; + use crate::decimal::Decimal128; + use crate::Array; + use arrow_schema::DataType; use num::{BigInt, Num}; - use crate::array::array_decimal::Decimal128Array; - use crate::array::{array_decimal, Array}; - use crate::datatypes::DataType; - use crate::util::decimal::{Decimal128, Decimal256}; - #[test] fn test_decimal_builder() { let mut builder = Decimal128Builder::new(38, 6); @@ -377,7 +376,7 @@ mod tests { .expect("should not validate invalid value at builder"); let array = builder.finish(); - let array_data = array_decimal::DecimalArray::data(&array); + let array_data = array.data(); array_data.validate_values().unwrap(); } } diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs similarity index 94% rename from arrow/src/array/builder/fixed_size_binary_builder.rs rename to arrow-array/src/builder/fixed_size_binary_builder.rs index 30c25e0a62b..15b840d0a95 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow-array/src/builder/fixed_size_binary_builder.rs @@ -15,16 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{ - ArrayBuilder, ArrayData, ArrayRef, FixedSizeBinaryArray, UInt8BufferBuilder, -}; -use crate::datatypes::DataType; -use crate::error::{ArrowError, Result}; +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, UInt8BufferBuilder}; +use crate::{ArrayRef, FixedSizeBinaryArray}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; -use super::NullBufferBuilder; - #[derive(Debug)] pub struct FixedSizeBinaryBuilder { values_builder: UInt8BufferBuilder, @@ -58,7 +56,7 @@ impl FixedSizeBinaryBuilder { /// Automatically update the null buffer to delimit the slice appended in as a /// distinct value element. #[inline] - pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { + pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> { if self.value_length != value.as_ref().len() as i32 { Err(ArrowError::InvalidArgumentError( "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() @@ -127,9 +125,9 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { mod tests { use super::*; - use crate::array::Array; - use crate::array::FixedSizeBinaryArray; - use crate::datatypes::DataType; + use crate::Array; + use crate::FixedSizeBinaryArray; + use arrow_schema::DataType; #[test] fn test_fixed_size_binary_builder() { diff --git a/arrow/src/array/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs similarity index 95% rename from arrow/src/array/builder/fixed_size_list_builder.rs rename to arrow-array/src/builder/fixed_size_list_builder.rs index da850d15624..e15708ed6c3 100644 --- a/arrow/src/array/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -15,18 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::ArrayBuilder; +use crate::{ArrayRef, FixedSizeListArray}; +use arrow_data::ArrayData; +use arrow_schema::{DataType, Field}; use std::any::Any; use std::sync::Arc; -use crate::array::ArrayData; -use crate::array::ArrayRef; -use crate::array::FixedSizeListArray; -use crate::datatypes::DataType; -use crate::datatypes::Field; - -use super::ArrayBuilder; -use super::NullBufferBuilder; - /// Array builder for [`FixedSizeListArray`] #[derive(Debug)] pub struct FixedSizeListBuilder { @@ -150,9 +146,9 @@ where mod tests { use super::*; - use crate::array::Array; - use crate::array::Int32Array; - use crate::array::Int32Builder; + use crate::builder::Int32Builder; + use crate::Array; + use crate::Int32Array; #[test] fn test_fixed_size_list_array_builder() { diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow-array/src/builder/generic_binary_builder.rs similarity index 97% rename from arrow/src/array/builder/generic_binary_builder.rs rename to arrow-array/src/builder/generic_binary_builder.rs index 7f83a945343..c806bebf9a0 100644 --- a/arrow/src/array/builder/generic_binary_builder.rs +++ b/arrow-array/src/builder/generic_binary_builder.rs @@ -15,15 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{ - ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait, - UInt8BufferBuilder, -}; +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder}; +use crate::{ArrayRef, GenericBinaryArray, OffsetSizeTrait}; +use arrow_data::ArrayDataBuilder; use std::any::Any; use std::sync::Arc; -use super::{BufferBuilder, NullBufferBuilder}; - /// Array builder for [`GenericBinaryArray`] #[derive(Debug)] pub struct GenericBinaryBuilder { diff --git a/arrow/src/array/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs similarity index 96% rename from arrow/src/array/builder/generic_list_builder.rs rename to arrow-array/src/builder/generic_list_builder.rs index 1beda711417..3f5892ff037 100644 --- a/arrow/src/array/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -15,17 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, BufferBuilder}; +use crate::{ArrayRef, GenericListArray, OffsetSizeTrait}; +use arrow_data::ArrayData; +use arrow_schema::Field; use std::any::Any; use std::sync::Arc; -use crate::array::ArrayData; -use crate::array::ArrayRef; -use crate::array::GenericListArray; -use crate::array::OffsetSizeTrait; -use crate::datatypes::Field; - -use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; - /// Array builder for [`GenericListArray`] #[derive(Debug)] pub struct GenericListBuilder { @@ -155,10 +152,10 @@ where #[cfg(test)] mod tests { use super::*; - use crate::array::builder::ListBuilder; - use crate::array::{Array, Int32Array, Int32Builder}; - use crate::buffer::Buffer; - use crate::datatypes::DataType; + use crate::builder::{Int32Builder, ListBuilder}; + use crate::{Array, Int32Array}; + use arrow_buffer::Buffer; + use arrow_schema::DataType; fn _test_generic_list_array_builder() { let values_builder = Int32Builder::with_capacity(10); diff --git a/arrow/src/array/builder/generic_string_builder.rs b/arrow-array/src/builder/generic_string_builder.rs similarity index 96% rename from arrow/src/array/builder/generic_string_builder.rs rename to arrow-array/src/builder/generic_string_builder.rs index f36e499b846..f766b6f55f2 100644 --- a/arrow/src/array/builder/generic_string_builder.rs +++ b/arrow-array/src/builder/generic_string_builder.rs @@ -15,12 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::array::{Array, ArrayBuilder, ArrayRef, GenericStringArray, OffsetSizeTrait}; +use crate::builder::{ArrayBuilder, GenericBinaryBuilder}; +use crate::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait}; use std::any::Any; use std::sync::Arc; -use super::GenericBinaryBuilder; - /// Array builder for [`GenericStringArray`] #[derive(Debug)] pub struct GenericStringBuilder { @@ -134,7 +133,8 @@ impl ArrayBuilder for GenericStringBuilder() { let mut builder = GenericStringBuilder::::new(); diff --git a/arrow/src/array/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs similarity index 93% rename from arrow/src/array/builder/map_builder.rs rename to arrow-array/src/builder/map_builder.rs index 766e8a56b38..78f49550071 100644 --- a/arrow/src/array/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -15,20 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, BufferBuilder}; +use crate::{Array, ArrayRef, MapArray, StructArray}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; use std::sync::Arc; -use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; -use crate::array::array::Array; -use crate::array::ArrayData; -use crate::array::ArrayRef; -use crate::array::MapArray; -use crate::array::StructArray; -use crate::datatypes::DataType; -use crate::datatypes::Field; -use crate::error::ArrowError; -use crate::error::Result; - #[derive(Debug)] pub struct MapBuilder { offsets_builder: BufferBuilder, @@ -96,7 +90,7 @@ impl MapBuilder { /// /// Returns an error if the key and values builders are in an inconsistent state. #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<()> { + pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> { if self.key_builder.len() != self.value_builder.len() { return Err(ArrowError::InvalidArgumentError(format!( "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}", @@ -189,11 +183,10 @@ impl ArrayBuilder for MapBuilder { #[cfg(test)] mod tests { use super::*; + use arrow_buffer::Buffer; + use arrow_data::Bitmap; - use crate::array::builder::StringBuilder; - use crate::array::Int32Builder; - use crate::bitmap::Bitmap; - use crate::buffer::Buffer; + use crate::builder::{Int32Builder, StringBuilder}; // TODO: add a test that finishes building, after designing a spec-compliant // way of inserting values to the map. diff --git a/arrow/src/array/builder/mod.rs b/arrow-array/src/builder/mod.rs similarity index 73% rename from arrow/src/array/builder/mod.rs rename to arrow-array/src/builder/mod.rs index c02acb32653..41583e1219d 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -15,63 +15,56 @@ // specific language governing permissions and limitations // under the License. -//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable -//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used -//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData) +//! Defines a [`BufferBuilder`](crate::builder::BufferBuilder) capable +//! of creating a [`Buffer`](arrow_buffer::Buffer) which can be used +//! as an internal buffer in an [`ArrayData`](arrow_data::ArrayData) //! object. mod boolean_buffer_builder; +pub use boolean_buffer_builder::*; + mod boolean_builder; +pub use boolean_builder::*; mod buffer_builder; +pub use buffer_builder::*; mod decimal_builder; +pub use decimal_builder::*; mod fixed_size_binary_builder; +pub use fixed_size_binary_builder::*; mod fixed_size_list_builder; +pub use fixed_size_list_builder::*; mod generic_binary_builder; +pub use generic_binary_builder::*; mod generic_list_builder; +pub use generic_list_builder::*; mod generic_string_builder; +pub use generic_string_builder::*; mod map_builder; +pub use map_builder::*; mod null_buffer_builder; mod primitive_builder; +pub use primitive_builder::*; mod primitive_dictionary_builder; +pub use primitive_dictionary_builder::*; mod string_dictionary_builder; +pub use string_dictionary_builder::*; mod struct_builder; +pub use struct_builder::*; mod union_builder; +pub use union_builder::*; +use crate::ArrayRef; use std::any::Any; -use std::marker::PhantomData; -use std::ops::Range; - -use super::ArrayRef; - -pub use boolean_buffer_builder::BooleanBufferBuilder; -pub use boolean_builder::BooleanBuilder; -pub use buffer_builder::BufferBuilder; -pub use decimal_builder::Decimal128Builder; -pub use decimal_builder::Decimal256Builder; -pub use fixed_size_binary_builder::FixedSizeBinaryBuilder; -pub use fixed_size_list_builder::FixedSizeListBuilder; -pub use generic_binary_builder::GenericBinaryBuilder; -pub use generic_list_builder::GenericListBuilder; -pub use generic_string_builder::GenericStringBuilder; -pub use map_builder::{MapBuilder, MapFieldNames}; -use null_buffer_builder::NullBufferBuilder; -pub use primitive_builder::PrimitiveBuilder; -pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder; -pub use string_dictionary_builder::StringDictionaryBuilder; -pub use struct_builder::{make_builder, StructBuilder}; -pub use union_builder::UnionBuilder; /// Trait for dealing with different array builders at runtime /// /// # Example /// /// ``` -/// # use arrow::{ -/// # array::{ArrayBuilder, ArrayRef, Float64Builder, Int64Builder, StringArray, StringBuilder}, -/// # error::ArrowError, -/// # }; -/// # fn main() -> std::result::Result<(), ArrowError> { /// // Create +/// # use arrow_array::{ArrayRef, StringArray}; +/// # use arrow_array::builder::{ArrayBuilder, Float64Builder, Int64Builder, StringBuilder}; +/// /// let mut data_builders: Vec> = vec![ /// Box::new(Float64Builder::new()), /// Box::new(Int64Builder::new()), @@ -110,8 +103,6 @@ pub use union_builder::UnionBuilder; /// .value(0), /// "🍎" /// ); -/// # Ok(()) -/// # } /// ``` pub trait ArrayBuilder: Any + Send { /// Returns the number of array slots in the builder diff --git a/arrow/src/array/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs similarity index 98% rename from arrow/src/array/builder/null_buffer_builder.rs rename to arrow-array/src/builder/null_buffer_builder.rs index ef2e4c50ab9..b2aa622ca7a 100644 --- a/arrow/src/array/builder/null_buffer_builder.rs +++ b/arrow-array/src/builder/null_buffer_builder.rs @@ -15,9 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::buffer::Buffer; - -use super::BooleanBufferBuilder; +use crate::builder::BooleanBufferBuilder; +use arrow_buffer::Buffer; /// Builder for creating the null bit buffer. /// This builder only materializes the buffer when we append `false`. diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs similarity index 84% rename from arrow/src/array/builder/primitive_builder.rs rename to arrow-array/src/builder/primitive_builder.rs index 38c8b447147..c5b8c955707 100644 --- a/arrow/src/array/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -15,15 +15,42 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::{ArrayBuilder, BufferBuilder}; +use crate::types::*; +use crate::{ArrayRef, ArrowPrimitiveType, PrimitiveArray}; +use arrow_data::ArrayData; use std::any::Any; use std::sync::Arc; -use crate::array::ArrayData; -use crate::array::ArrayRef; -use crate::array::PrimitiveArray; -use crate::datatypes::ArrowPrimitiveType; - -use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; +pub type Int8Builder = PrimitiveBuilder; +pub type Int16Builder = PrimitiveBuilder; +pub type Int32Builder = PrimitiveBuilder; +pub type Int64Builder = PrimitiveBuilder; +pub type UInt8Builder = PrimitiveBuilder; +pub type UInt16Builder = PrimitiveBuilder; +pub type UInt32Builder = PrimitiveBuilder; +pub type UInt64Builder = PrimitiveBuilder; +pub type Float32Builder = PrimitiveBuilder; +pub type Float64Builder = PrimitiveBuilder; + +pub type TimestampSecondBuilder = PrimitiveBuilder; +pub type TimestampMillisecondBuilder = PrimitiveBuilder; +pub type TimestampMicrosecondBuilder = PrimitiveBuilder; +pub type TimestampNanosecondBuilder = PrimitiveBuilder; +pub type Date32Builder = PrimitiveBuilder; +pub type Date64Builder = PrimitiveBuilder; +pub type Time32SecondBuilder = PrimitiveBuilder; +pub type Time32MillisecondBuilder = PrimitiveBuilder; +pub type Time64MicrosecondBuilder = PrimitiveBuilder; +pub type Time64NanosecondBuilder = PrimitiveBuilder; +pub type IntervalYearMonthBuilder = PrimitiveBuilder; +pub type IntervalDayTimeBuilder = PrimitiveBuilder; +pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder; +pub type DurationSecondBuilder = PrimitiveBuilder; +pub type DurationMillisecondBuilder = PrimitiveBuilder; +pub type DurationMicrosecondBuilder = PrimitiveBuilder; +pub type DurationNanosecondBuilder = PrimitiveBuilder; /// Array builder for fixed-width primitive types #[derive(Debug)] @@ -179,14 +206,14 @@ impl PrimitiveBuilder { #[cfg(test)] mod tests { use super::*; + use arrow_buffer::Buffer; use crate::array::Array; use crate::array::BooleanArray; use crate::array::Date32Array; use crate::array::Int32Array; - use crate::array::Int32Builder; use crate::array::TimestampSecondArray; - use crate::buffer::Buffer; + use crate::builder::Int32Builder; #[test] fn test_primitive_array_builder_i32() { diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs similarity index 92% rename from arrow/src/array/builder/primitive_dictionary_builder.rs rename to arrow-array/src/builder/primitive_dictionary_builder.rs index 0fd41a181f5..c43416e5af3 100644 --- a/arrow/src/array/builder/primitive_dictionary_builder.rs +++ b/arrow-array/src/builder/primitive_dictionary_builder.rs @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::{ArrayBuilder, PrimitiveBuilder}; +use crate::{Array, ArrayRef, ArrowPrimitiveType, DictionaryArray}; +use arrow_buffer::{ArrowNativeType, ToByteSlice}; +use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::Arc; -use crate::array::{Array, ArrayRef, ArrowPrimitiveType, DictionaryArray}; -use crate::datatypes::{ArrowNativeType, DataType, ToByteSlice}; -use crate::error::{ArrowError, Result}; - -use super::ArrayBuilder; -use super::PrimitiveBuilder; - /// Wraps a type implementing `ToByteSlice` implementing `Hash` and `Eq` for it /// /// This is necessary to handle types such as f32, which don't natively implement these @@ -54,13 +51,12 @@ impl Eq for Value {} /// # Example: /// /// ``` -/// use arrow::array::{ -/// Array, PrimitiveBuilder, PrimitiveDictionaryBuilder, -/// UInt8Array, UInt32Array, -/// }; -/// use arrow::datatypes::{UInt8Type, UInt32Type}; /// -/// let mut builder = PrimitiveDictionaryBuilder::::new(); +/// # use arrow_array::builder::PrimitiveDictionaryBuilder; +/// # use arrow_array::types::{UInt32Type, UInt8Type}; +/// # use arrow_array::{Array, UInt32Array, UInt8Array}; +/// +/// let mut builder = PrimitiveDictionaryBuilder::::new(); /// builder.append(12345678).unwrap(); /// builder.append_null(); /// builder.append(22345678).unwrap(); @@ -175,7 +171,7 @@ where /// if already present in the values array or a new index if the /// value is appended to the values array. #[inline] - pub fn append(&mut self, value: V::Native) -> Result { + pub fn append(&mut self, value: V::Native) -> Result { let key = match self.map.entry(Value(value)) { Entry::Vacant(vacant) => { // Append new value. @@ -223,8 +219,7 @@ mod tests { use crate::array::Array; use crate::array::UInt32Array; use crate::array::UInt8Array; - use crate::datatypes::UInt32Type; - use crate::datatypes::UInt8Type; + use crate::types::{UInt32Type, UInt8Type}; #[test] fn test_primitive_dictionary_builder() { diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs similarity index 94% rename from arrow/src/array/builder/string_dictionary_builder.rs rename to arrow-array/src/builder/string_dictionary_builder.rs index 3816e0be1dd..bab17d4a9f6 100644 --- a/arrow/src/array/builder/string_dictionary_builder.rs +++ b/arrow-array/src/builder/string_dictionary_builder.rs @@ -15,12 +15,11 @@ // specific language governing permissions and limitations // under the License. -use super::PrimitiveBuilder; -use crate::array::{ - Array, ArrayBuilder, ArrayRef, DictionaryArray, StringArray, StringBuilder, -}; -use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType}; -use crate::error::{ArrowError, Result}; +use crate::builder::{ArrayBuilder, PrimitiveBuilder, StringBuilder}; +use crate::types::ArrowDictionaryKeyType; +use crate::{Array, ArrayRef, DictionaryArray, StringArray}; +use arrow_buffer::ArrowNativeType; +use arrow_schema::{ArrowError, DataType}; use hashbrown::hash_map::RawEntryMut; use hashbrown::HashMap; use std::any::Any; @@ -31,17 +30,13 @@ use std::sync::Arc; /// arrays or result in an ordered dictionary. /// /// ``` -/// use arrow::{ -/// array::{ -/// Int8Array, StringArray, -/// PrimitiveBuilder, StringBuilder, StringDictionaryBuilder, -/// }, -/// datatypes::Int8Type, -/// }; -/// /// // Create a dictionary array indexed by bytes whose values are Strings. /// // It can thus hold up to 256 distinct string values. /// +/// # use arrow_array::builder::StringDictionaryBuilder; +/// # use arrow_array::{Int8Array, StringArray}; +/// # use arrow_array::types::Int8Type; +/// /// let mut builder = StringDictionaryBuilder::::new(); /// /// // The builder builds the dictionary value by value @@ -132,9 +127,8 @@ where /// # Example /// /// ``` - /// use arrow::datatypes::Int16Type; - /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array}; - /// use std::convert::TryFrom; + /// # use arrow_array::builder::StringDictionaryBuilder; + /// # use arrow_array::{Int16Array, StringArray}; /// /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]); /// @@ -152,7 +146,7 @@ where pub fn new_with_dictionary( keys_capacity: usize, dictionary_values: &StringArray, - ) -> Result { + ) -> Result { let state = ahash::RandomState::default(); let dict_len = dictionary_values.len(); @@ -239,7 +233,7 @@ where /// value is appended to the values array. /// /// Returns an error if the new index would overflow the key type. - pub fn append(&mut self, value: impl AsRef) -> Result { + pub fn append(&mut self, value: impl AsRef) -> Result { let value = value.as_ref(); let state = &self.state; @@ -312,8 +306,7 @@ mod tests { use crate::array::Array; use crate::array::Int8Array; - use crate::datatypes::Int16Type; - use crate::datatypes::Int8Type; + use crate::types::{Int16Type, Int8Type}; #[test] fn test_string_dictionary_builder() { diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs similarity index 97% rename from arrow/src/array/builder/struct_builder.rs rename to arrow-array/src/builder/struct_builder.rs index c5db09119e0..cadc8a529f5 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -15,17 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::*; +use crate::{Array, ArrayRef, StructArray}; +use arrow_data::ArrayData; +use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit}; use std::any::Any; -use std::fmt; use std::sync::Arc; -use crate::array::builder::decimal_builder::Decimal128Builder; -use crate::array::*; -use crate::datatypes::DataType; -use crate::datatypes::Field; - -use super::NullBufferBuilder; - /// Array builder for Struct types. /// /// Note that callers should make sure that methods of all the child field builders are @@ -36,8 +33,8 @@ pub struct StructBuilder { null_buffer_builder: NullBufferBuilder, } -impl fmt::Debug for StructBuilder { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Debug for StructBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StructBuilder") .field("fields", &self.fields) .field("bitmap_builder", &self.null_buffer_builder) @@ -94,6 +91,7 @@ impl ArrayBuilder for StructBuilder { /// This function is useful to construct arrays from an arbitrary vectors with known/expected /// schema. pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { + use crate::builder::*; match datatype { DataType::Null => unimplemented!(), DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)), @@ -218,7 +216,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &mut self.field_builders { let arr = f.finish(); - child_data.push(arr.into_data()); + child_data.push(arr.data().clone()); } let length = self.len(); let null_bit_buffer = self.null_buffer_builder.finish(); @@ -248,10 +246,10 @@ impl StructBuilder { #[cfg(test)] mod tests { use super::*; + use arrow_buffer::Buffer; + use arrow_data::Bitmap; use crate::array::Array; - use crate::bitmap::Bitmap; - use crate::buffer::Buffer; #[test] fn test_struct_array_builder() { diff --git a/arrow/src/array/builder/union_builder.rs b/arrow-array/src/builder/union_builder.rs similarity index 92% rename from arrow/src/array/builder/union_builder.rs rename to arrow-array/src/builder/union_builder.rs index c0ae76853dd..def1e1eca06 100644 --- a/arrow/src/array/builder/union_builder.rs +++ b/arrow-array/src/builder/union_builder.rs @@ -15,24 +15,16 @@ // specific language governing permissions and limitations // under the License. +use crate::builder::buffer_builder::{Int32BufferBuilder, Int8BufferBuilder}; +use crate::builder::null_buffer_builder::NullBufferBuilder; +use crate::builder::BufferBuilder; +use crate::{make_array, ArrowPrimitiveType, UnionArray}; +use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; use std::collections::HashMap; -use crate::array::ArrayDataBuilder; -use crate::array::Int32BufferBuilder; -use crate::array::Int8BufferBuilder; -use crate::array::UnionArray; -use crate::buffer::Buffer; - -use crate::datatypes::DataType; -use crate::datatypes::Field; -use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType}; -use crate::error::{ArrowError, Result}; - -use super::{BufferBuilder, NullBufferBuilder}; - -use crate::array::make_array; - /// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`. #[derive(Debug)] struct FieldData { @@ -112,8 +104,8 @@ impl FieldData { /// Example: **Dense Memory Layout** /// /// ``` -/// use arrow::array::UnionBuilder; -/// use arrow::datatypes::{Float64Type, Int32Type}; +/// # use arrow_array::builder::UnionBuilder; +/// # use arrow_array::types::{Float64Type, Int32Type}; /// /// let mut builder = UnionBuilder::new_dense(); /// builder.append::("a", 1).unwrap(); @@ -132,8 +124,8 @@ impl FieldData { /// /// Example: **Sparse Memory Layout** /// ``` -/// use arrow::array::UnionBuilder; -/// use arrow::datatypes::{Float64Type, Int32Type}; +/// # use arrow_array::builder::UnionBuilder; +/// # use arrow_array::types::{Float64Type, Int32Type}; /// /// let mut builder = UnionBuilder::new_sparse(); /// builder.append::("a", 1).unwrap(); @@ -203,7 +195,10 @@ impl UnionBuilder { /// is part of the final array, appending a NULL requires /// specifying which field (child) to use. #[inline] - pub fn append_null(&mut self, type_name: &str) -> Result<()> { + pub fn append_null( + &mut self, + type_name: &str, + ) -> Result<(), ArrowError> { self.append_option::(type_name, None) } @@ -213,7 +208,7 @@ impl UnionBuilder { &mut self, type_name: &str, v: T::Native, - ) -> Result<()> { + ) -> Result<(), ArrowError> { self.append_option::(type_name, Some(v)) } @@ -221,7 +216,7 @@ impl UnionBuilder { &mut self, type_name: &str, v: Option, - ) -> Result<()> { + ) -> Result<(), ArrowError> { let type_name = type_name.to_string(); let mut field_data = match self.fields.remove(&type_name) { @@ -278,7 +273,7 @@ impl UnionBuilder { } /// Builds this builder creating a new `UnionArray`. - pub fn build(mut self) -> Result { + pub fn build(mut self) -> Result { let type_id_buffer = self.type_id_builder.finish(); let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish()); let mut children = Vec::new(); diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs new file mode 100644 index 00000000000..0556d30deac --- /dev/null +++ b/arrow-array/src/cast.rs @@ -0,0 +1,767 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines helper functions for force [`Array`] downcasts + +use crate::array::*; +use crate::types::*; + +/// Downcast an [`Array`] to a [`PrimitiveArray`] based on its [`DataType`] +/// accepts a number of subsequent patterns to match the data type +/// +/// ``` +/// # use arrow_array::{Array, downcast_primitive_array, cast::as_string_array}; +/// # use arrow_schema::DataType; +/// +/// fn print_primitive(array: &dyn Array) { +/// downcast_primitive_array!( +/// array => { +/// for v in array { +/// println!("{:?}", v); +/// } +/// } +/// DataType::Utf8 => { +/// for v in as_string_array(array) { +/// println!("{:?}", v); +/// } +/// } +/// t => println!("Unsupported datatype {}", t) +/// ) +/// } +/// ``` +/// +/// [`DataType`]: arrow_schema::DataType +#[macro_export] +macro_rules! downcast_primitive_array { + ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => { + downcast_primitive_array!($values => {$e} $($p => $fallback)*) + }; + + ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => { + match $values.data_type() { + arrow_schema::DataType::Int8 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Int8Type, + >($values); + $e + } + arrow_schema::DataType::Int16 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Int16Type, + >($values); + $e + } + arrow_schema::DataType::Int32 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Int32Type, + >($values); + $e + } + arrow_schema::DataType::Int64 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Int64Type, + >($values); + $e + } + arrow_schema::DataType::UInt8 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::UInt8Type, + >($values); + $e + } + arrow_schema::DataType::UInt16 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::UInt16Type, + >($values); + $e + } + arrow_schema::DataType::UInt32 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::UInt32Type, + >($values); + $e + } + arrow_schema::DataType::UInt64 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::UInt64Type, + >($values); + $e + } + arrow_schema::DataType::Float16 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Float16Type, + >($values); + $e + } + arrow_schema::DataType::Float32 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Float32Type, + >($values); + $e + } + arrow_schema::DataType::Float64 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Float64Type, + >($values); + $e + } + arrow_schema::DataType::Date32 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Date32Type, + >($values); + $e + } + arrow_schema::DataType::Date64 => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Date64Type, + >($values); + $e + } + arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Second) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Time32SecondType, + >($values); + $e + } + arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Millisecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Time32MillisecondType, + >($values); + $e + } + arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Microsecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Time64MicrosecondType, + >($values); + $e + } + arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Nanosecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::Time64NanosecondType, + >($values); + $e + } + arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Second, _) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::TimestampSecondType, + >($values); + $e + } + arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, _) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::TimestampMillisecondType, + >($values); + $e + } + arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, _) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::TimestampMicrosecondType, + >($values); + $e + } + arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, _) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::TimestampNanosecondType, + >($values); + $e + } + arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::YearMonth) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::IntervalYearMonthType, + >($values); + $e + } + arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::DayTime) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::IntervalDayTimeType, + >($values); + $e + } + arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::IntervalMonthDayNanoType, + >($values); + $e + } + arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Second) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::DurationSecondType, + >($values); + $e + } + arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Millisecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::DurationMillisecondType, + >($values); + $e + } + arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Microsecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::DurationMicrosecondType, + >($values); + $e + } + arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Nanosecond) => { + let $values = $crate::cast::as_primitive_array::< + $crate::types::DurationNanosecondType, + >($values); + $e + } + $($p => $fallback,)* + } + }; + + (($values1:ident, $values2:ident) => $e:block $($p:pat => $fallback:expr $(,)*)*) => { + match ($values1.data_type(), $values2.data_type()) { + (arrow_schema::DataType::Int8, arrow_schema::DataType::Int8) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Int8Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Int8Type, + >($values2); + $e + } + (arrow_schema::DataType::Int16, arrow_schema::DataType::Int16) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Int16Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Int16Type, + >($values2); + $e + } + (arrow_schema::DataType::Int32, arrow_schema::DataType::Int32) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Int32Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Int32Type, + >($values2); + $e + } + (arrow_schema::DataType::Int64, arrow_schema::DataType::Int64) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Int64Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Int64Type, + >($values2); + $e + } + (arrow_schema::DataType::UInt8, arrow_schema::DataType::UInt8) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::UInt8Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::UInt8Type, + >($values2); + $e + } + (arrow_schema::DataType::UInt16, arrow_schema::DataType::UInt16) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::UInt16Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::UInt16Type, + >($values2); + $e + } + (arrow_schema::DataType::UInt32, arrow_schema::DataType::UInt32) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::UInt32Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::UInt32Type, + >($values2); + $e + } + (arrow_schema::DataType::UInt64, arrow_schema::DataType::UInt64) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::UInt64Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::UInt64Type, + >($values2); + $e + } + (arrow_schema::DataType::Float32, arrow_schema::DataType::Float32) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Float32Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Float32Type, + >($values2); + $e + } + (arrow_schema::DataType::Float64, arrow_schema::DataType::Float64) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Float64Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Float64Type, + >($values2); + $e + } + (arrow_schema::DataType::Date32, arrow_schema::DataType::Date32) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Date32Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Date32Type, + >($values2); + $e + } + (arrow_schema::DataType::Date64, arrow_schema::DataType::Date64) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Date64Type, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Date64Type, + >($values2); + $e + } + (arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Second), arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Second)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Time32SecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Time32SecondType, + >($values2); + $e + } + (arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Millisecond), arrow_schema::DataType::Time32(arrow_schema::TimeUnit::Millisecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Time32MillisecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Time32MillisecondType, + >($values2); + $e + } + (arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Microsecond), arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Microsecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Time64MicrosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Time64MicrosecondType, + >($values2); + $e + } + (arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Nanosecond), arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Nanosecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::Time64NanosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::Time64NanosecondType, + >($values2); + $e + } + (arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Second, _), arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Second, _)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::TimestampSecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::TimestampSecondType, + >($values2); + $e + } + (arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, _), arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, _)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::TimestampMillisecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::TimestampMillisecondType, + >($values2); + $e + } + (arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, _), arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, _)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::TimestampMicrosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::TimestampMicrosecondType, + >($values2); + $e + } + (arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, _), arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, _)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::TimestampNanosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::TimestampNanosecondType, + >($values2); + $e + } + (arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::YearMonth), arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::YearMonth)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::IntervalYearMonthType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::IntervalYearMonthType, + >($values2); + $e + } + (arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::DayTime), arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::DayTime)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::IntervalDayTimeType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::IntervalDayTimeType, + >($values2); + $e + } + (arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano), arrow_schema::DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::IntervalMonthDayNanoType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::IntervalMonthDayNanoType, + >($values2); + $e + } + (arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Second), arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Second)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::DurationSecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::DurationSecondType, + >($values2); + $e + } + (arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Millisecond), arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Millisecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::DurationMillisecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::DurationMillisecondType, + >($values2); + $e + } + (arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Microsecond), arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Microsecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::DurationMicrosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::DurationMicrosecondType, + >($values2); + $e + } + (arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Nanosecond), arrow_schema::DataType::Duration(arrow_schema::TimeUnit::Nanosecond)) => { + let $values1 = $crate::cast::as_primitive_array::< + $crate::types::DurationNanosecondType, + >($values1); + let $values2 = $crate::cast::as_primitive_array::< + $crate::types::DurationNanosecondType, + >($values2); + $e + } + $($p => $fallback,)* + } + }; +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`], to +/// [`PrimitiveArray`], panic'ing on failure. +/// +/// # Example +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{ArrayRef, Int32Array}; +/// # use arrow_array::cast::as_primitive_array; +/// # use arrow_array::types::Int32Type; +/// +/// let arr: ArrayRef = Arc::new(Int32Array::from(vec![Some(1)])); +/// +/// // Downcast an `ArrayRef` to Int32Array / PrimiveArray: +/// let primitive_array: &Int32Array = as_primitive_array(&arr); +/// +/// // Equivalently: +/// let primitive_array = as_primitive_array::(&arr); +/// +/// // This is the equivalent of: +/// let primitive_array = arr +/// .as_any() +/// .downcast_ref::() +/// .unwrap(); +/// ``` + +pub fn as_primitive_array(arr: &dyn Array) -> &PrimitiveArray +where + T: ArrowPrimitiveType, +{ + arr.as_any() + .downcast_ref::>() + .expect("Unable to downcast to primitive array") +} + +/// Downcast an [`Array`] to a [`DictionaryArray`] based on its [`DataType`], accepts +/// a number of subsequent patterns to match the data type +/// +/// ``` +/// # use arrow_array::{Array, StringArray, downcast_dictionary_array, cast::as_string_array}; +/// # use arrow_schema::DataType; +/// +/// fn print_strings(array: &dyn Array) { +/// downcast_dictionary_array!( +/// array => match array.values().data_type() { +/// DataType::Utf8 => { +/// for v in array.downcast_dict::().unwrap() { +/// println!("{:?}", v); +/// } +/// } +/// t => println!("Unsupported dictionary value type {}", t), +/// }, +/// DataType::Utf8 => { +/// for v in as_string_array(array) { +/// println!("{:?}", v); +/// } +/// } +/// t => println!("Unsupported datatype {}", t) +/// ) +/// } +/// ``` +/// +/// [`DataType`]: arrow_schema::DataType +#[macro_export] +macro_rules! downcast_dictionary_array { + ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => { + downcast_dictionary_array!($values => {$e} $($p => $fallback)*) + }; + + ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => { + match $values.data_type() { + arrow_schema::DataType::Dictionary(k, _) => match k.as_ref() { + arrow_schema::DataType::Int8 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::Int8Type, + >($values); + $e + }, + arrow_schema::DataType::Int16 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::Int16Type, + >($values); + $e + }, + arrow_schema::DataType::Int32 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::Int32Type, + >($values); + $e + }, + arrow_schema::DataType::Int64 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::Int64Type, + >($values); + $e + }, + arrow_schema::DataType::UInt8 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::UInt8Type, + >($values); + $e + }, + arrow_schema::DataType::UInt16 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::UInt16Type, + >($values); + $e + }, + arrow_schema::DataType::UInt32 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::UInt32Type, + >($values); + $e + }, + arrow_schema::DataType::UInt64 => { + let $values = $crate::cast::as_dictionary_array::< + $crate::types::UInt64Type, + >($values); + $e + }, + k => unreachable!("unsupported dictionary key type: {}", k) + } + $($p => $fallback,)* + } + } +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`DictionaryArray`], panic'ing on failure. +/// +/// # Example +/// +/// ``` +/// # use arrow_array::{ArrayRef, DictionaryArray}; +/// # use arrow_array::cast::as_dictionary_array; +/// # use arrow_array::types::Int32Type; +/// +/// let arr: DictionaryArray = vec![Some("foo")].into_iter().collect(); +/// let arr: ArrayRef = std::sync::Arc::new(arr); +/// let dict_array: &DictionaryArray = as_dictionary_array::(&arr); +/// ``` +pub fn as_dictionary_array(arr: &dyn Array) -> &DictionaryArray +where + T: ArrowDictionaryKeyType, +{ + arr.as_any() + .downcast_ref::>() + .expect("Unable to downcast to dictionary array") +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`GenericListArray`], panic'ing on failure. +pub fn as_generic_list_array( + arr: &dyn Array, +) -> &GenericListArray { + arr.as_any() + .downcast_ref::>() + .expect("Unable to downcast to list array") +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`ListArray`], panic'ing on failure. +#[inline] +pub fn as_list_array(arr: &dyn Array) -> &ListArray { + as_generic_list_array::(arr) +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`LargeListArray`], panic'ing on failure. +#[inline] +pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray { + as_generic_list_array::(arr) +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`GenericBinaryArray`], panic'ing on failure. +#[inline] +pub fn as_generic_binary_array( + arr: &dyn Array, +) -> &GenericBinaryArray { + arr.as_any() + .downcast_ref::>() + .expect("Unable to downcast to binary array") +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`StringArray`], panic'ing on failure. +/// +/// # Example +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::cast::as_string_array; +/// # use arrow_array::{ArrayRef, StringArray}; +/// +/// let arr: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("foo")])); +/// let string_array = as_string_array(&arr); +/// ``` +pub fn as_string_array(arr: &dyn Array) -> &StringArray { + arr.as_any() + .downcast_ref::() + .expect("Unable to downcast to StringArray") +} + +/// Force downcast of an [`Array`], such as an [`ArrayRef`] to +/// [`BooleanArray`], panic'ing on failure. +/// +/// # Example +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{ArrayRef, BooleanArray}; +/// # use arrow_array::cast::as_boolean_array; +/// +/// let arr: ArrayRef = Arc::new(BooleanArray::from_iter(vec![Some(true)])); +/// let boolean_array = as_boolean_array(&arr); +/// ``` +pub fn as_boolean_array(arr: &dyn Array) -> &BooleanArray { + arr.as_any() + .downcast_ref::() + .expect("Unable to downcast to BooleanArray") +} + +macro_rules! array_downcast_fn { + ($name: ident, $arrty: ty, $arrty_str:expr) => { + #[doc = "Force downcast of an [`Array`], such as an [`ArrayRef`] to "] + #[doc = $arrty_str] + pub fn $name(arr: &dyn Array) -> &$arrty { + arr.as_any().downcast_ref::<$arrty>().expect(concat!( + "Unable to downcast to typed array through ", + stringify!($name) + )) + } + }; + + // use recursive macro to generate dynamic doc string for a given array type + ($name: ident, $arrty: ty) => { + array_downcast_fn!( + $name, + $arrty, + concat!("[`", stringify!($arrty), "`], panic'ing on failure.") + ); + }; +} + +array_downcast_fn!(as_largestring_array, LargeStringArray); +array_downcast_fn!(as_null_array, NullArray); +array_downcast_fn!(as_struct_array, StructArray); +array_downcast_fn!(as_union_array, UnionArray); +array_downcast_fn!(as_map_array, MapArray); +array_downcast_fn!(as_decimal_array, Decimal128Array); + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::*; + + #[test] + fn test_as_decimal_array_ref() { + let array: Decimal128Array = vec![Some(123), None, Some(1111)] + .into_iter() + .collect::() + .with_precision_and_scale(10, 2) + .unwrap(); + assert!(!as_decimal_array(&array).is_empty()); + let result_decimal = as_decimal_array(&array); + assert_eq!(result_decimal, &array); + } + + #[test] + fn test_as_primitive_array_ref() { + let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); + assert!(!as_primitive_array::(&array).is_empty()); + + // should also work when wrapped in an Arc + let array: ArrayRef = Arc::new(array); + assert!(!as_primitive_array::(&array).is_empty()); + } + + #[test] + fn test_as_string_array_ref() { + let array: StringArray = vec!["foo", "bar"].into_iter().map(Some).collect(); + assert!(!as_string_array(&array).is_empty()); + + // should also work when wrapped in an Arc + let array: ArrayRef = Arc::new(array); + assert!(!as_string_array(&array).is_empty()) + } +} diff --git a/arrow/src/util/decimal.rs b/arrow-array/src/decimal.rs similarity index 96% rename from arrow/src/util/decimal.rs rename to arrow-array/src/decimal.rs index 421942df5c1..605659290c7 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow-array/src/decimal.rs @@ -17,13 +17,10 @@ //! Decimal related utils -use crate::datatypes::{ - DataType, Decimal128Type, Decimal256Type, DecimalType, DECIMAL256_MAX_PRECISION, - DECIMAL_DEFAULT_SCALE, -}; -use crate::error::{ArrowError, Result}; -use num::bigint::BigInt; -use num::Signed; +use crate::types::{Decimal128Type, Decimal256Type, DecimalType}; +use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; +use arrow_schema::{ArrowError, DataType}; +use num::{BigInt, Signed}; use std::cmp::{min, Ordering}; /// [`Decimal`] is the generic representation of a single decimal value @@ -76,7 +73,11 @@ impl Decimal { /// Safety: /// This method doesn't validate if the decimal value represented by the bytes /// can be fitted into the specified precision. - pub fn try_new_from_bytes(precision: u8, scale: u8, bytes: &T::Native) -> Result + pub fn try_new_from_bytes( + precision: u8, + scale: u8, + bytes: &T::Native, + ) -> Result where Self: Sized, { @@ -203,8 +204,7 @@ pub type Decimal128 = Decimal; impl Decimal128 { /// Creates `Decimal128` from an `i128` value. - #[allow(dead_code)] - pub(crate) fn new_from_i128(precision: u8, scale: u8, value: i128) -> Self { + pub fn new_from_i128(precision: u8, scale: u8, value: i128) -> Self { Decimal128 { precision, scale, @@ -230,7 +230,11 @@ pub type Decimal256 = Decimal; impl Decimal256 { /// Constructs a `Decimal256` value from a `BigInt`. - pub fn from_big_int(num: &BigInt, precision: u8, scale: u8) -> Result { + pub fn from_big_int( + num: &BigInt, + precision: u8, + scale: u8, + ) -> Result { let mut bytes = if num.is_negative() { [255_u8; 32] } else { @@ -242,7 +246,7 @@ impl Decimal256 { } /// Constructs a `BigInt` from this `Decimal256` value. - pub(crate) fn to_big_int(self) -> BigInt { + pub fn to_big_int(self) -> BigInt { BigInt::from_signed_bytes_le(&self.value) } } diff --git a/arrow/src/datatypes/delta.rs b/arrow-array/src/delta.rs similarity index 100% rename from arrow/src/datatypes/delta.rs rename to arrow-array/src/delta.rs diff --git a/arrow/src/array/iterator.rs b/arrow-array/src/iterator.rs similarity index 88% rename from arrow/src/array/iterator.rs rename to arrow-array/src/iterator.rs index e64712fa883..8f8e27998b0 100644 --- a/arrow/src/array/iterator.rs +++ b/arrow-array/src/iterator.rs @@ -15,14 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::array::array::ArrayAccessor; -use crate::array::{DecimalArray, FixedSizeBinaryArray}; -use crate::datatypes::{Decimal128Type, Decimal256Type}; - -use super::{ - BooleanArray, GenericBinaryArray, GenericListArray, GenericStringArray, - PrimitiveArray, +use crate::array::{ + ArrayAccessor, BooleanArray, DecimalArray, FixedSizeBinaryArray, GenericBinaryArray, + GenericListArray, GenericStringArray, PrimitiveArray, }; +use crate::types::{Decimal128Type, Decimal256Type}; /// An iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`] /// @@ -36,39 +33,14 @@ use super::{ /// on every index of the array, and handle the null mask separately. For [`PrimitiveArray`] /// this functionality is provided by [`compute::unary`] /// -/// ``` -/// # use arrow::array::PrimitiveArray; -/// # use arrow::compute::unary; -/// # use arrow::datatypes::Int32Type; -/// -/// fn add(a: &PrimitiveArray, b: i32) -> PrimitiveArray { -/// unary(a, |a| a + b) -/// } -/// ``` -/// /// If performing a fallible operation, it isn't possible to perform the operation independently /// of the null mask, as this might result in a spurious failure on a null index. However, /// there are more efficient ways to iterate over just the non-null indices, this functionality /// is provided by [`compute::try_unary`] /// -/// ``` -/// # use arrow::array::PrimitiveArray; -/// # use arrow::compute::try_unary; -/// # use arrow::datatypes::Int32Type; -/// # use arrow::error::{ArrowError, Result}; -/// -/// fn checked_add(a: &PrimitiveArray, b: i32) -> Result> { -/// try_unary(a, |a| { -/// a.checked_add(b).ok_or_else(|| { -/// ArrowError::CastError(format!("overflow adding {} to {}", a, b)) -/// }) -/// }) -/// } -/// ``` -/// -/// [`PrimitiveArray`]: [crate::array::PrimitiveArray] -/// [`compute::unary`]: [crate::compute::unary] -/// [`compute::try_unary`]: [crate::compute::try_unary] +/// [`PrimitiveArray`]: [crate::PrimitiveArray] +/// [`compute::unary`]: [arrow::compute::unary] +/// [`compute::try_unary`]: [arrow::compute::try_unary] #[derive(Debug)] pub struct ArrayIter { array: T, diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs new file mode 100644 index 00000000000..bd7549a193b --- /dev/null +++ b/arrow-array/src/lib.rs @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The central type in Apache Arrow are arrays, which are a known-length sequence of values +//! all having the same type. This module provides concrete implementations of each type, as +//! well as an [`Array`] trait that can be used for type-erasure. +//! +//! # Downcasting an Array +//! +//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or [`ArrayRef`]. +//! For example, [`RecordBatch`](`crate::RecordBatch`) stores columns as [`ArrayRef`]. +//! +//! Whilst these arrays can be passed directly to the [`compute`], [`csv`], [`json`], etc... APIs, +//! it is often the case that you wish to interact with the data directly. +//! +//! This requires downcasting to the concrete type of the array: +//! +//! ``` +//! # use arrow_array::{Array, Float32Array, Int32Array}; +//! +//! fn sum_int32(array: &dyn Array) -> i32 { +//! let integers: &Int32Array = array.as_any().downcast_ref().unwrap(); +//! integers.iter().map(|val| val.unwrap_or_default()).sum() +//! } +//! +//! // Note: the values for positions corresponding to nulls will be arbitrary +//! fn as_f32_slice(array: &dyn Array) -> &[f32] { +//! array.as_any().downcast_ref::().unwrap().values() +//! } +//! ``` +//! +//! Additionally, there are convenient functions to do this casting +//! such as [`cast::as_primitive_array`] and [`cast::as_string_array`]: +//! +//! ``` +//! # use arrow_array::Array; +//! # use arrow_array::cast::as_primitive_array; +//! # use arrow_array::types::Float32Type; +//! +//! fn as_f32_slice(array: &dyn Array) -> &[f32] { +//! // use as_primtive_array +//! as_primitive_array::(array).values() +//! } +//! ``` + +//! # Building an Array +//! +//! Most [`Array`] implementations can be constructed directly from iterators or [`Vec`] +//! +//! ``` +//! # use arrow_array::{Int32Array, ListArray, StringArray}; +//! # use arrow_array::types::Int32Type; +//! +//! Int32Array::from(vec![1, 2]); +//! Int32Array::from(vec![Some(1), None]); +//! Int32Array::from_iter([1, 2, 3, 4]); +//! Int32Array::from_iter([Some(1), Some(2), None, Some(4)]); +//! +//! StringArray::from(vec!["foo", "bar"]); +//! StringArray::from(vec![Some("foo"), None]); +//! StringArray::from_iter([Some("foo"), None]); +//! StringArray::from_iter_values(["foo", "bar"]); +//! +//! ListArray::from_iter_primitive::([ +//! Some(vec![Some(1), None, Some(3)]), +//! None, +//! Some(vec![]) +//! ]); +//! ``` +//! +//! Additionally [`ArrayBuilder`](builder::ArrayBuilder) implementations can be +//! used to construct arrays with a push-based interface +//! +//! ``` +//! # use arrow_array::Int16Array; +//! # +//! // Create a new builder with a capacity of 100 +//! let mut builder = Int16Array::builder(100); +//! +//! // Append a single primitive value +//! builder.append_value(1); +//! +//! // Append a null value +//! builder.append_null(); +//! +//! // Append a slice of primitive values +//! builder.append_slice(&[2, 3, 4]); +//! +//! // Build the array +//! let array = builder.finish(); +//! +//! assert_eq!( +//! 5, +//! array.len(), +//! "The array has 5 values, counting the null value" +//! ); +//! +//! assert_eq!(2, array.value(2), "Get the value with index 2"); +//! +//! assert_eq!( +//! &array.values()[3..5], +//! &[3, 4], +//! "Get slice of len 2 starting at idx 3" +//! ) +//! ``` +//! +//! # Zero-Copy Slicing +//! +//! Given an [`Array`] of arbitrary length, it is possible to create an owned slice of this +//! data. Internally this just increments some ref-counts, and so is incredibly cheap +//! +//! ```rust +//! # use std::sync::Arc; +//! # use arrow_array::{ArrayRef, Int32Array}; +//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef; +//! +//! // Slice with offset 1 and length 2 +//! let sliced = array.slice(1, 2); +//! let ints = sliced.as_any().downcast_ref::().unwrap(); +//! assert_eq!(ints.values(), &[2, 3]); +//! ``` +//! +//! # Internal Representation +//! +//! Internally, arrays are represented by one or several [`Buffer`], the number and meaning of +//! which depend on the array’s data type, as documented in the [Arrow specification]. +//! +//! For example, the type `Int16Array` represents an array of 16-bit integers and consists of: +//! +//! * An optional [`Bitmap`] identifying any null values +//! * A contiguous [`Buffer`] of 16-bit integers +//! +//! Similarly, the type `StringArray` represents an array of UTF-8 strings and consists of: +//! +//! * An optional [`Bitmap`] identifying any null values +//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8 sequences within the values buffer +//! * A values [`Buffer`] of UTF-8 encoded string data +//! +//! [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html +//! [`&dyn Array`]: Array +//! [`Bitmap`]: arrow_data::Bitmap +//! [`Buffer`]: arrow_buffer::Buffer +//! [`compute`]: https://docs.rs/arrow/latest/arrow/compute/index.html +//! [`json`]: https://docs.rs/arrow/latest/arrow/json/index.html +//! [`csv`]: https://docs.rs/arrow/latest/arrow/csv/index.html + +pub mod array; +pub use array::*; + +mod record_batch; +pub use record_batch::{RecordBatch, RecordBatchOptions}; + +pub mod builder; +pub mod cast; +pub mod decimal; +mod delta; +pub mod iterator; +mod raw_pointer; +pub mod temporal_conversions; +mod trusted_len; +pub mod types; + +#[cfg(test)] +mod tests { + use crate::builder::*; + + #[test] + fn test_buffer_builder_availability() { + let _builder = Int8BufferBuilder::new(10); + let _builder = Int16BufferBuilder::new(10); + let _builder = Int32BufferBuilder::new(10); + let _builder = Int64BufferBuilder::new(10); + let _builder = UInt16BufferBuilder::new(10); + let _builder = UInt32BufferBuilder::new(10); + let _builder = Float32BufferBuilder::new(10); + let _builder = Float64BufferBuilder::new(10); + let _builder = TimestampSecondBufferBuilder::new(10); + let _builder = TimestampMillisecondBufferBuilder::new(10); + let _builder = TimestampMicrosecondBufferBuilder::new(10); + let _builder = TimestampNanosecondBufferBuilder::new(10); + let _builder = Date32BufferBuilder::new(10); + let _builder = Date64BufferBuilder::new(10); + let _builder = Time32SecondBufferBuilder::new(10); + let _builder = Time32MillisecondBufferBuilder::new(10); + let _builder = Time64MicrosecondBufferBuilder::new(10); + let _builder = Time64NanosecondBufferBuilder::new(10); + let _builder = IntervalYearMonthBufferBuilder::new(10); + let _builder = IntervalDayTimeBufferBuilder::new(10); + let _builder = IntervalMonthDayNanoBufferBuilder::new(10); + let _builder = DurationSecondBufferBuilder::new(10); + let _builder = DurationMillisecondBufferBuilder::new(10); + let _builder = DurationMicrosecondBufferBuilder::new(10); + let _builder = DurationNanosecondBufferBuilder::new(10); + } +} diff --git a/arrow/src/array/raw_pointer.rs b/arrow-array/src/raw_pointer.rs similarity index 100% rename from arrow/src/array/raw_pointer.rs rename to arrow-array/src/raw_pointer.rs diff --git a/arrow/src/record_batch.rs b/arrow-array/src/record_batch.rs similarity index 91% rename from arrow/src/record_batch.rs rename to arrow-array/src/record_batch.rs index f71c67fe774..58462449ea3 100644 --- a/arrow/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -16,16 +16,14 @@ // under the License. //! A two-dimensional batch of column-oriented data with a defined -//! [schema](crate::datatypes::Schema). +//! [schema](arrow_schema::Schema). +use crate::{new_empty_array, Array, ArrayRef, StructArray}; +use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef}; use std::sync::Arc; -use crate::array::*; -use crate::datatypes::*; -use crate::error::{ArrowError, Result}; - /// A two-dimensional batch of column-oriented data with a defined -/// [schema](crate::datatypes::Schema). +/// [schema](arrow_schema::Schema). /// /// A `RecordBatch` is a two-dimensional dataset of a number of /// contiguous arrays, each the same length. @@ -34,8 +32,6 @@ use crate::error::{ArrowError, Result}; /// /// Record batches are a convenient unit of work for various /// serialization and computation functions, possibly incremental. -/// See also [CSV reader](crate::csv::Reader) and -/// [JSON reader](crate::json::Reader). #[derive(Clone, Debug, PartialEq)] pub struct RecordBatch { schema: SchemaRef, @@ -61,12 +57,10 @@ impl RecordBatch { /// # Example /// /// ``` - /// use std::sync::Arc; - /// use arrow::array::Int32Array; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{Int32Array, RecordBatch}; + /// # use arrow_schema::{DataType, Field, Schema}; /// - /// # fn main() -> arrow::error::Result<()> { /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let schema = Schema::new(vec![ /// Field::new("id", DataType::Int32, false) @@ -75,11 +69,12 @@ impl RecordBatch { /// let batch = RecordBatch::try_new( /// Arc::new(schema), /// vec![Arc::new(id_array)] - /// )?; - /// # Ok(()) - /// # } + /// ).unwrap(); /// ``` - pub fn try_new(schema: SchemaRef, columns: Vec) -> Result { + pub fn try_new( + schema: SchemaRef, + columns: Vec, + ) -> Result { let options = RecordBatchOptions::new(); Self::try_new_impl(schema, columns, &options) } @@ -92,7 +87,7 @@ impl RecordBatch { schema: SchemaRef, columns: Vec, options: &RecordBatchOptions, - ) -> Result { + ) -> Result { Self::try_new_impl(schema, columns, options) } @@ -117,7 +112,7 @@ impl RecordBatch { schema: SchemaRef, columns: Vec, options: &RecordBatchOptions, - ) -> Result { + ) -> Result { // check that number of fields in schema match column length if schema.fields().len() != columns.len() { return Err(ArrowError::InvalidArgumentError(format!( @@ -191,13 +186,13 @@ impl RecordBatch { }) } - /// Returns the [`Schema`](crate::datatypes::Schema) of the record batch. + /// Returns the [`Schema`](arrow_schema::Schema) of the record batch. pub fn schema(&self) -> SchemaRef { self.schema.clone() } /// Projects the schema onto the specified columns - pub fn project(&self, indices: &[usize]) -> Result { + pub fn project(&self, indices: &[usize]) -> Result { let projected_schema = self.schema.project(indices)?; let batch_fields = indices .iter() @@ -210,7 +205,7 @@ impl RecordBatch { )) }) }) - .collect::>>()?; + .collect::, _>>()?; RecordBatch::try_new_with_options( SchemaRef::new(projected_schema), @@ -227,22 +222,18 @@ impl RecordBatch { /// # Example /// /// ``` - /// use std::sync::Arc; - /// use arrow::array::Int32Array; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{Int32Array, RecordBatch}; + /// # use arrow_schema::{DataType, Field, Schema}; /// - /// # fn main() -> arrow::error::Result<()> { /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let schema = Schema::new(vec![ /// Field::new("id", DataType::Int32, false) /// ]); /// - /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?; + /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap(); /// /// assert_eq!(batch.num_columns(), 1); - /// # Ok(()) - /// # } /// ``` pub fn num_columns(&self) -> usize { self.columns.len() @@ -253,22 +244,18 @@ impl RecordBatch { /// # Example /// /// ``` - /// use std::sync::Arc; - /// use arrow::array::Int32Array; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{Int32Array, RecordBatch}; + /// # use arrow_schema::{DataType, Field, Schema}; /// - /// # fn main() -> arrow::error::Result<()> { /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let schema = Schema::new(vec![ /// Field::new("id", DataType::Int32, false) /// ]); /// - /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?; + /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap(); /// /// assert_eq!(batch.num_rows(), 5); - /// # Ok(()) - /// # } /// ``` pub fn num_rows(&self) -> usize { self.row_count @@ -322,10 +309,8 @@ impl RecordBatch { /// /// Example: /// ``` - /// use std::sync::Arc; - /// use arrow::array::{ArrayRef, Int32Array, StringArray}; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray}; /// /// let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2])); /// let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"])); @@ -335,7 +320,7 @@ impl RecordBatch { /// ("b", b), /// ]); /// ``` - pub fn try_from_iter(value: I) -> Result + pub fn try_from_iter(value: I) -> Result where I: IntoIterator, F: AsRef, @@ -359,10 +344,8 @@ impl RecordBatch { /// /// Example: /// ``` - /// use std::sync::Arc; - /// use arrow::array::{ArrayRef, Int32Array, StringArray}; - /// use arrow::datatypes::{Schema, Field, DataType}; - /// use arrow::record_batch::RecordBatch; + /// # use std::sync::Arc; + /// # use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray}; /// /// let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2])); /// let b: ArrayRef = Arc::new(StringArray::from(vec![Some("a"), Some("b")])); @@ -374,7 +357,7 @@ impl RecordBatch { /// ("b", b, true), /// ]); /// ``` - pub fn try_from_iter_with_nullable(value: I) -> Result + pub fn try_from_iter_with_nullable(value: I) -> Result where I: IntoIterator, F: AsRef, @@ -394,12 +377,6 @@ impl RecordBatch { let schema = Arc::new(Schema::new(fields)); RecordBatch::try_new(schema, columns) } - - /// Concatenates `batches` together into a single record batch. - #[deprecated(note = "please use arrow::compute::concat_batches")] - pub fn concat(schema: &SchemaRef, batches: &[Self]) -> Result { - crate::compute::concat_batches(schema, batches) - } } /// Options that control the behaviour used when creating a [`RecordBatch`]. @@ -469,29 +446,14 @@ impl From for StructArray { } } -/// Trait for types that can read `RecordBatch`'s. -pub trait RecordBatchReader: Iterator> { - /// Returns the schema of this `RecordBatchReader`. - /// - /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this - /// reader should have the same schema as returned from this method. - fn schema(&self) -> SchemaRef; - - /// Reads the next `RecordBatch`. - #[deprecated( - since = "2.0.0", - note = "This method is deprecated in favour of `next` from the trait Iterator." - )] - fn next_batch(&mut self) -> Result> { - self.next().transpose() - } -} - #[cfg(test)] mod tests { use super::*; - - use crate::buffer::Buffer; + use crate::{ + BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray, + }; + use arrow_buffer::{Buffer, ToByteSlice}; + use arrow_data::ArrayDataBuilder; #[test] fn create_record_batch() { diff --git a/arrow/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs similarity index 71% rename from arrow/src/temporal_conversions.rs rename to arrow-array/src/temporal_conversions.rs index 14fa82f6e7d..4a371fc788e 100644 --- a/arrow/src/temporal_conversions.rs +++ b/arrow-array/src/temporal_conversions.rs @@ -17,21 +17,23 @@ //! Conversion methods for dates and times. -use chrono::{Duration, NaiveDateTime, NaiveTime}; +use crate::ArrowPrimitiveType; +use arrow_schema::{DataType, TimeUnit}; +use chrono::{Duration, NaiveDate, NaiveDateTime, NaiveTime}; /// Number of seconds in a day -pub(crate) const SECONDS_IN_DAY: i64 = 86_400; +pub const SECONDS_IN_DAY: i64 = 86_400; /// Number of milliseconds in a second -pub(crate) const MILLISECONDS: i64 = 1_000; +pub const MILLISECONDS: i64 = 1_000; /// Number of microseconds in a second -pub(crate) const MICROSECONDS: i64 = 1_000_000; +pub const MICROSECONDS: i64 = 1_000_000; /// Number of nanoseconds in a second -pub(crate) const NANOSECONDS: i64 = 1_000_000_000; +pub const NANOSECONDS: i64 = 1_000_000_000; /// Number of milliseconds in a day -pub(crate) const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS; +pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS; /// Number of days between 0001-01-01 and 1970-01-01 -pub(crate) const EPOCH_DAYS_FROM_CE: i32 = 719_163; +pub const EPOCH_DAYS_FROM_CE: i32 = 719_163; /// converts a `i32` representing a `date32` to [`NaiveDateTime`] #[inline] @@ -167,6 +169,66 @@ pub fn duration_ns_to_duration(v: i64) -> Duration { Duration::nanoseconds(v) } +/// Converts an [`ArrowPrimitiveType`] to [`NaiveDateTime`] +pub fn as_datetime(v: i64) -> Option { + match T::DATA_TYPE { + DataType::Date32 => Some(date32_to_datetime(v as i32)), + DataType::Date64 => Some(date64_to_datetime(v)), + DataType::Time32(_) | DataType::Time64(_) => None, + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => Some(timestamp_s_to_datetime(v)), + TimeUnit::Millisecond => Some(timestamp_ms_to_datetime(v)), + TimeUnit::Microsecond => Some(timestamp_us_to_datetime(v)), + TimeUnit::Nanosecond => Some(timestamp_ns_to_datetime(v)), + }, + // interval is not yet fully documented [ARROW-3097] + DataType::Interval(_) => None, + _ => None, + } +} + +/// Converts an [`ArrowPrimitiveType`] to [`NaiveDate`] +pub fn as_date(v: i64) -> Option { + as_datetime::(v).map(|datetime| datetime.date()) +} + +/// Converts an [`ArrowPrimitiveType`] to [`NaiveTime`] +pub fn as_time(v: i64) -> Option { + match T::DATA_TYPE { + DataType::Time32(unit) => { + // safe to immediately cast to u32 as `self.value(i)` is positive i32 + let v = v as u32; + match unit { + TimeUnit::Second => Some(time32s_to_time(v as i32)), + TimeUnit::Millisecond => Some(time32ms_to_time(v as i32)), + _ => None, + } + } + DataType::Time64(unit) => match unit { + TimeUnit::Microsecond => Some(time64us_to_time(v)), + TimeUnit::Nanosecond => Some(time64ns_to_time(v)), + _ => None, + }, + DataType::Timestamp(_, _) => as_datetime::(v).map(|datetime| datetime.time()), + DataType::Date32 | DataType::Date64 => Some(NaiveTime::from_hms(0, 0, 0)), + DataType::Interval(_) => None, + _ => None, + } +} + +/// Converts an [`ArrowPrimitiveType`] to [`Duration`] +pub fn as_duration(v: i64) -> Option { + match T::DATA_TYPE { + DataType::Duration(unit) => match unit { + TimeUnit::Second => Some(duration_s_to_duration(v)), + TimeUnit::Millisecond => Some(duration_ms_to_duration(v)), + TimeUnit::Microsecond => Some(duration_us_to_duration(v)), + TimeUnit::Nanosecond => Some(duration_ns_to_duration(v)), + }, + _ => None, + } +} + #[cfg(test)] mod tests { use crate::temporal_conversions::{ diff --git a/arrow/src/util/trusted_len.rs b/arrow-array/src/trusted_len.rs similarity index 96% rename from arrow/src/util/trusted_len.rs rename to arrow-array/src/trusted_len.rs index 84a66238b63..fdec18b7878 100644 --- a/arrow/src/util/trusted_len.rs +++ b/arrow-array/src/trusted_len.rs @@ -15,11 +15,7 @@ // specific language governing permissions and limitations // under the License. -use super::bit_util; -use crate::{ - buffer::{Buffer, MutableBuffer}, - datatypes::ArrowNativeType, -}; +use arrow_buffer::{bit_util, ArrowNativeType, Buffer, MutableBuffer}; /// Creates two [`Buffer`]s from an iterator of `Option`. /// The first buffer corresponds to a bitmap buffer, the second one diff --git a/arrow/src/datatypes/types.rs b/arrow-array/src/types.rs similarity index 99% rename from arrow/src/datatypes/types.rs rename to arrow-array/src/types.rs index 1b7d0675bb4..b7c1a6318f9 100644 --- a/arrow/src/datatypes/types.rs +++ b/arrow-array/src/types.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit}; -use crate::datatypes::delta::shift_months; -use crate::datatypes::{ +use crate::array::ArrowPrimitiveType; +use crate::delta::shift_months; +use arrow_data::decimal::{ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE, }; +use arrow_schema::{DataType, IntervalUnit, TimeUnit}; use chrono::{Duration, NaiveDate}; use half::f16; use std::ops::{Add, Sub}; diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 9605cdda720..60fe3c6ca9a 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -22,6 +22,9 @@ use std::hash::Hash; use crate::error::ArrowError; use crate::field::Field; +/// A reference-counted reference to a [`Schema`]. +pub type SchemaRef = std::sync::Arc; + /// Describes the meta-data of an ordered sequence of relative types. /// /// Note that this information is only part of the meta-data and not part of the physical diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index f29c4e31791..48b7f39547e 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -47,6 +47,7 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] arrow-buffer = { version = "23.0.0", path = "../arrow-buffer" } arrow-data = { version = "23.0.0", path = "../arrow-data" } arrow-schema = { version = "23.0.0", path = "../arrow-schema" } +arrow-array = { version = "23.0.0", path = "../arrow-array" } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } indexmap = { version = "1.9", default-features = false, features = ["std"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } diff --git a/arrow/src/array/cast.rs b/arrow/src/array/cast.rs deleted file mode 100644 index 2c8366ff5f9..00000000000 --- a/arrow/src/array/cast.rs +++ /dev/null @@ -1,761 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Defines helper functions for force [`Array`] downcasts - -use crate::array::*; -use crate::datatypes::*; - -/// Downcast an [`Array`] to a [`PrimitiveArray`] based on its [`DataType`], accepts -/// a number of subsequent patterns to match the data type -/// -/// ``` -/// # use arrow::downcast_primitive_array; -/// # use arrow::array::Array; -/// # use arrow::datatypes::DataType; -/// # use arrow::array::as_string_array; -/// -/// fn print_primitive(array: &dyn Array) { -/// downcast_primitive_array!( -/// array => { -/// for v in array { -/// println!("{:?}", v); -/// } -/// } -/// DataType::Utf8 => { -/// for v in as_string_array(array) { -/// println!("{:?}", v); -/// } -/// } -/// t => println!("Unsupported datatype {}", t) -/// ) -/// } -/// ``` -/// -#[macro_export] -macro_rules! downcast_primitive_array { - ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => { - downcast_primitive_array!($values => {$e} $($p => $fallback)*) - }; - - ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => { - match $values.data_type() { - $crate::datatypes::DataType::Int8 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Int8Type, - >($values); - $e - } - $crate::datatypes::DataType::Int16 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Int16Type, - >($values); - $e - } - $crate::datatypes::DataType::Int32 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Int32Type, - >($values); - $e - } - $crate::datatypes::DataType::Int64 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Int64Type, - >($values); - $e - } - $crate::datatypes::DataType::UInt8 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::UInt8Type, - >($values); - $e - } - $crate::datatypes::DataType::UInt16 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::UInt16Type, - >($values); - $e - } - $crate::datatypes::DataType::UInt32 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::UInt32Type, - >($values); - $e - } - $crate::datatypes::DataType::UInt64 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::UInt64Type, - >($values); - $e - } - $crate::datatypes::DataType::Float16 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Float16Type, - >($values); - $e - } - $crate::datatypes::DataType::Float32 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Float32Type, - >($values); - $e - } - $crate::datatypes::DataType::Float64 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Float64Type, - >($values); - $e - } - $crate::datatypes::DataType::Date32 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Date32Type, - >($values); - $e - } - $crate::datatypes::DataType::Date64 => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Date64Type, - >($values); - $e - } - $crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Second) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Time32SecondType, - >($values); - $e - } - $crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Millisecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Time32MillisecondType, - >($values); - $e - } - $crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Microsecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Time64MicrosecondType, - >($values); - $e - } - $crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Nanosecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::Time64NanosecondType, - >($values); - $e - } - $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Second, _) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampSecondType, - >($values); - $e - } - $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Millisecond, _) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMillisecondType, - >($values); - $e - } - $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Microsecond, _) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMicrosecondType, - >($values); - $e - } - $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Nanosecond, _) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampNanosecondType, - >($values); - $e - } - $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::YearMonth) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalYearMonthType, - >($values); - $e - } - $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::DayTime) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalDayTimeType, - >($values); - $e - } - $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::MonthDayNano) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalMonthDayNanoType, - >($values); - $e - } - $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Second) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::DurationSecondType, - >($values); - $e - } - $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Millisecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMillisecondType, - >($values); - $e - } - $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Microsecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMicrosecondType, - >($values); - $e - } - $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Nanosecond) => { - let $values = $crate::array::as_primitive_array::< - $crate::datatypes::DurationNanosecondType, - >($values); - $e - } - $($p => $fallback,)* - } - }; - - (($values1:ident, $values2:ident) => $e:block $($p:pat => $fallback:expr $(,)*)*) => { - match ($values1.data_type(), $values2.data_type()) { - ($crate::datatypes::DataType::Int8, $crate::datatypes::DataType::Int8) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Int8Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Int8Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Int16, $crate::datatypes::DataType::Int16) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Int16Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Int16Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Int32, $crate::datatypes::DataType::Int32) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Int32Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Int32Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Int64, $crate::datatypes::DataType::Int64) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Int64Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Int64Type, - >($values2); - $e - } - ($crate::datatypes::DataType::UInt8, $crate::datatypes::DataType::UInt8) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt8Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt8Type, - >($values2); - $e - } - ($crate::datatypes::DataType::UInt16, $crate::datatypes::DataType::UInt16) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt16Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt16Type, - >($values2); - $e - } - ($crate::datatypes::DataType::UInt32, $crate::datatypes::DataType::UInt32) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt32Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt32Type, - >($values2); - $e - } - ($crate::datatypes::DataType::UInt64, $crate::datatypes::DataType::UInt64) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt64Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::UInt64Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Float32, $crate::datatypes::DataType::Float32) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Float32Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Float32Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Float64, $crate::datatypes::DataType::Float64) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Float64Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Float64Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Date32, $crate::datatypes::DataType::Date32) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Date32Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Date32Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Date64, $crate::datatypes::DataType::Date64) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Date64Type, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Date64Type, - >($values2); - $e - } - ($crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Second), $crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Second)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Time32SecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Time32SecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Millisecond), $crate::datatypes::DataType::Time32($crate::datatypes::TimeUnit::Millisecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Time32MillisecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Time32MillisecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Microsecond), $crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Microsecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Time64MicrosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Time64MicrosecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Nanosecond), $crate::datatypes::DataType::Time64($crate::datatypes::TimeUnit::Nanosecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::Time64NanosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::Time64NanosecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Second, _), $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Second, _)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampSecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampSecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Millisecond, _), $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Millisecond, _)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMillisecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMillisecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Microsecond, _), $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Microsecond, _)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMicrosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampMicrosecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Nanosecond, _), $crate::datatypes::DataType::Timestamp($crate::datatypes::TimeUnit::Nanosecond, _)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampNanosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::TimestampNanosecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::YearMonth), $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::YearMonth)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalYearMonthType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalYearMonthType, - >($values2); - $e - } - ($crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::DayTime), $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::DayTime)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalDayTimeType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalDayTimeType, - >($values2); - $e - } - ($crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::MonthDayNano), $crate::datatypes::DataType::Interval($crate::datatypes::IntervalUnit::MonthDayNano)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalMonthDayNanoType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::IntervalMonthDayNanoType, - >($values2); - $e - } - ($crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Second), $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Second)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationSecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationSecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Millisecond), $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Millisecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMillisecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMillisecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Microsecond), $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Microsecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMicrosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationMicrosecondType, - >($values2); - $e - } - ($crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Nanosecond), $crate::datatypes::DataType::Duration($crate::datatypes::TimeUnit::Nanosecond)) => { - let $values1 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationNanosecondType, - >($values1); - let $values2 = $crate::array::as_primitive_array::< - $crate::datatypes::DurationNanosecondType, - >($values2); - $e - } - $($p => $fallback,)* - } - }; -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`], to -/// [`PrimitiveArray`], panic'ing on failure. -/// -/// # Example -/// -/// ``` -/// # use arrow::array::*; -/// # use arrow::datatypes::*; -/// # use std::sync::Arc; -/// let arr: ArrayRef = Arc::new(Int32Array::from(vec![Some(1)])); -/// -/// // Downcast an `ArrayRef` to Int32Array / PrimiveArray: -/// let primitive_array: &Int32Array = as_primitive_array(&arr); -/// -/// // Equivalently: -/// let primitive_array = as_primitive_array::(&arr); -/// -/// // This is the equivalent of: -/// let primitive_array = arr -/// .as_any() -/// .downcast_ref::() -/// .unwrap(); -/// ``` - -pub fn as_primitive_array(arr: &dyn Array) -> &PrimitiveArray -where - T: ArrowPrimitiveType, -{ - arr.as_any() - .downcast_ref::>() - .expect("Unable to downcast to primitive array") -} - -/// Downcast an [`Array`] to a [`DictionaryArray`] based on its [`DataType`], accepts -/// a number of subsequent patterns to match the data type -/// -/// ``` -/// # use arrow::downcast_dictionary_array; -/// # use arrow::array::{Array, StringArray}; -/// # use arrow::datatypes::DataType; -/// # use arrow::array::as_string_array; -/// -/// fn print_strings(array: &dyn Array) { -/// downcast_dictionary_array!( -/// array => match array.values().data_type() { -/// DataType::Utf8 => { -/// for v in array.downcast_dict::().unwrap() { -/// println!("{:?}", v); -/// } -/// } -/// t => println!("Unsupported dictionary value type {}", t), -/// }, -/// DataType::Utf8 => { -/// for v in as_string_array(array) { -/// println!("{:?}", v); -/// } -/// } -/// t => println!("Unsupported datatype {}", t) -/// ) -/// } -/// ``` -#[macro_export] -macro_rules! downcast_dictionary_array { - ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => { - downcast_dictionary_array!($values => {$e} $($p => $fallback)*) - }; - - ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => { - match $values.data_type() { - $crate::datatypes::DataType::Dictionary(k, _) => match k.as_ref() { - $crate::datatypes::DataType::Int8 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::Int8Type, - >($values); - $e - }, - $crate::datatypes::DataType::Int16 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::Int16Type, - >($values); - $e - }, - $crate::datatypes::DataType::Int32 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::Int32Type, - >($values); - $e - }, - $crate::datatypes::DataType::Int64 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::Int64Type, - >($values); - $e - }, - $crate::datatypes::DataType::UInt8 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::UInt8Type, - >($values); - $e - }, - $crate::datatypes::DataType::UInt16 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::UInt16Type, - >($values); - $e - }, - $crate::datatypes::DataType::UInt32 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::UInt32Type, - >($values); - $e - }, - $crate::datatypes::DataType::UInt64 => { - let $values = $crate::array::as_dictionary_array::< - $crate::datatypes::UInt64Type, - >($values); - $e - }, - k => unreachable!("unsupported dictionary key type: {}", k) - } - $($p => $fallback,)* - } - } -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`DictionaryArray`], panic'ing on failure. -/// -/// # Example -/// -/// ``` -/// # use arrow::array::*; -/// # use arrow::datatypes::*; -/// # use std::sync::Arc; -/// let arr: DictionaryArray = vec![Some("foo")].into_iter().collect(); -/// let arr: ArrayRef = std::sync::Arc::new(arr); -/// let dict_array: &DictionaryArray = as_dictionary_array::(&arr); -/// ``` -pub fn as_dictionary_array(arr: &dyn Array) -> &DictionaryArray -where - T: ArrowDictionaryKeyType, -{ - arr.as_any() - .downcast_ref::>() - .expect("Unable to downcast to dictionary array") -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`GenericListArray`], panic'ing on failure. -pub fn as_generic_list_array( - arr: &dyn Array, -) -> &GenericListArray { - arr.as_any() - .downcast_ref::>() - .expect("Unable to downcast to list array") -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`ListArray`], panic'ing on failure. -#[inline] -pub fn as_list_array(arr: &dyn Array) -> &ListArray { - as_generic_list_array::(arr) -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`LargeListArray`], panic'ing on failure. -#[inline] -pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray { - as_generic_list_array::(arr) -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`GenericBinaryArray`], panic'ing on failure. -#[inline] -pub fn as_generic_binary_array( - arr: &dyn Array, -) -> &GenericBinaryArray { - arr.as_any() - .downcast_ref::>() - .expect("Unable to downcast to binary array") -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`StringArray`], panic'ing on failure. -/// -/// # Example -/// -/// ``` -/// # use arrow::array::*; -/// # use std::sync::Arc; -/// let arr: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("foo")])); -/// let string_array = as_string_array(&arr); -/// ``` -pub fn as_string_array(arr: &dyn Array) -> &StringArray { - arr.as_any() - .downcast_ref::() - .expect("Unable to downcast to StringArray") -} - -/// Force downcast of an [`Array`], such as an [`ArrayRef`] to -/// [`BooleanArray`], panic'ing on failure. -/// -/// # Example -/// -/// ``` -/// # use arrow::array::*; -/// # use std::sync::Arc; -/// let arr: ArrayRef = Arc::new(BooleanArray::from_iter(vec![Some(true)])); -/// let boolean_array = as_boolean_array(&arr); -/// ``` -pub fn as_boolean_array(arr: &dyn Array) -> &BooleanArray { - arr.as_any() - .downcast_ref::() - .expect("Unable to downcast to BooleanArray") -} - -macro_rules! array_downcast_fn { - ($name: ident, $arrty: ty, $arrty_str:expr) => { - #[doc = "Force downcast of an [`Array`], such as an [`ArrayRef`] to "] - #[doc = $arrty_str] - pub fn $name(arr: &dyn Array) -> &$arrty { - arr.as_any().downcast_ref::<$arrty>().expect(concat!( - "Unable to downcast to typed array through ", - stringify!($name) - )) - } - }; - - // use recursive macro to generate dynamic doc string for a given array type - ($name: ident, $arrty: ty) => { - array_downcast_fn!( - $name, - $arrty, - concat!("[`", stringify!($arrty), "`], panic'ing on failure.") - ); - }; -} - -array_downcast_fn!(as_largestring_array, LargeStringArray); -array_downcast_fn!(as_null_array, NullArray); -array_downcast_fn!(as_struct_array, StructArray); -array_downcast_fn!(as_union_array, UnionArray); -array_downcast_fn!(as_map_array, MapArray); -array_downcast_fn!(as_decimal_array, Decimal128Array); - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use super::*; - - #[test] - fn test_as_decimal_array_ref() { - let array: Decimal128Array = vec![Some(123), None, Some(1111)] - .into_iter() - .collect::() - .with_precision_and_scale(10, 2) - .unwrap(); - assert!(!as_decimal_array(&array).is_empty()); - let result_decimal = as_decimal_array(&array); - assert_eq!(result_decimal, &array); - } - - #[test] - fn test_as_primitive_array_ref() { - let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); - assert!(!as_primitive_array::(&array).is_empty()); - - // should also work when wrapped in an Arc - let array: ArrayRef = Arc::new(array); - assert!(!as_primitive_array::(&array).is_empty()); - } - - #[test] - fn test_as_string_array_ref() { - let array: StringArray = vec!["foo", "bar"].into_iter().map(Some).collect(); - assert!(!as_string_array(&array).is_empty()); - - // should also work when wrapped in an Arc - let array: ArrayRef = Arc::new(array); - assert!(!as_string_array(&array).is_empty()) - } -} diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 32a1da17f84..c0972e2a969 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -157,487 +157,21 @@ //! [`Bitmap`]: crate::bitmap::Bitmap //! [`Buffer`]: crate::buffer::Buffer -#[allow(clippy::module_inception)] -mod array; -mod array_binary; -mod array_boolean; -mod array_decimal; -mod array_dictionary; -mod array_fixed_size_binary; -mod array_fixed_size_list; -mod array_list; -mod array_map; -mod array_primitive; -mod array_string; -mod array_struct; -mod array_union; -mod builder; -mod cast; #[cfg(feature = "ffi")] mod ffi; -mod iterator; -mod null; mod ord; -mod raw_pointer; - -use crate::datatypes::*; // --------------------- Array & ArrayData --------------------- - -pub use self::array::Array; -pub use self::array::ArrayAccessor; -pub use self::array::ArrayRef; +pub use arrow_array::array::*; +pub use arrow_array::builder::*; +pub use arrow_array::cast::*; +pub use arrow_array::iterator::*; pub use arrow_data::{ layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, DataTypeLayout, }; -pub use self::array_binary::BinaryArray; -pub use self::array_binary::LargeBinaryArray; -pub use self::array_boolean::BooleanArray; -pub use self::array_decimal::Decimal128Array; -pub use self::array_decimal::Decimal256Array; -pub use self::array_decimal::DecimalArray; -pub use self::array_fixed_size_binary::FixedSizeBinaryArray; -pub use self::array_fixed_size_list::FixedSizeListArray; - -pub use self::array_dictionary::{DictionaryArray, TypedDictionaryArray}; -pub use self::array_list::LargeListArray; -pub use self::array_list::ListArray; -pub use self::array_map::MapArray; -pub use self::array_primitive::PrimitiveArray; -pub use self::array_string::LargeStringArray; -pub use self::array_string::StringArray; -pub use self::array_struct::StructArray; -pub use self::array_union::UnionArray; -pub use self::null::NullArray; - -pub use self::array::make_array; -pub use self::array::new_empty_array; -pub use self::array::new_null_array; - -pub(crate) use self::array_primitive::{as_datetime, as_time}; - -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Int8Array; -/// let arr : Int8Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type Int8Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Int16Array; -/// let arr : Int16Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type Int16Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Int32Array; -/// let arr : Int32Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type Int32Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Int64Array; -/// let arr : Int64Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type Int64Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::UInt8Array; -/// let arr : UInt8Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type UInt8Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::UInt16Array; -/// let arr : UInt16Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type UInt16Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::UInt32Array; -/// let arr : UInt32Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type UInt32Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::UInt64Array; -/// let arr : UInt64Array = [Some(1), Some(2)].into_iter().collect(); -/// ``` -pub type UInt64Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Float16Array; -/// use half::f16; -/// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); -/// ``` -pub type Float16Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Float32Array; -/// let arr : Float32Array = [Some(1.0), Some(2.0)].into_iter().collect(); -/// ``` -pub type Float32Array = PrimitiveArray; -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::Float64Array; -/// let arr : Float64Array = [Some(1.0), Some(2.0)].into_iter().collect(); -/// ``` -pub type Float64Array = PrimitiveArray; - -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, Int8DictionaryArray, Int8Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type Int8DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, Int16DictionaryArray, Int16Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int16Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type Int16DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, Int32DictionaryArray, Int32Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int32Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type Int32DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, Int64DictionaryArray, Int64Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int64Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type Int64DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, UInt8DictionaryArray, UInt8Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt8Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type UInt8DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, UInt16DictionaryArray, UInt16Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt16Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type UInt16DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, UInt32DictionaryArray, UInt32Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt32Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type UInt32DictionaryArray = DictionaryArray; -/// -/// A dictionary array where each element is a single value indexed by an integer key. -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow::array::{Array, UInt64DictionaryArray, UInt64Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt64Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` -pub type UInt64DictionaryArray = DictionaryArray; -/// -/// A primitive array where each element is of type [TimestampSecondType]. -/// See also [`Timestamp`](crate::datatypes::DataType::Timestamp). -/// -/// # Example: UTC timestamps post epoch -/// ``` -/// # use arrow::array::TimestampSecondArray; -/// use chrono::FixedOffset; -/// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00 -/// let arr = TimestampSecondArray::from_vec(vec![11111111], None); -/// // OR -/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(11111111)], None); -/// let utc_offset = FixedOffset::east(0); -/// -/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11") -/// ``` -/// -/// # Example: UTC timestamps pre epoch -/// ``` -/// # use arrow::array::TimestampSecondArray; -/// use chrono::FixedOffset; -/// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00 -/// let arr = TimestampSecondArray::from_vec(vec![-11111111], None); -/// // OR -/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(-11111111)], None); -/// let utc_offset = FixedOffset::east(0); -/// -/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49") -/// ``` -/// -/// # Example: With timezone specified -/// ``` -/// # use arrow::array::TimestampSecondArray; -/// use chrono::FixedOffset; -/// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00 -/// let arr = TimestampSecondArray::from_vec(vec![11111111], Some("+10:00".to_string())); -/// // OR -/// let arr = TimestampSecondArray::from_opt_vec(vec![Some(11111111)], Some("+10:00".to_string())); -/// let sydney_offset = FixedOffset::east(10 * 60 * 60); -/// -/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11") -/// ``` -/// -pub type TimestampSecondArray = PrimitiveArray; -/// A primitive array where each element is of type `TimestampMillisecondType.` -/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) -pub type TimestampMillisecondArray = PrimitiveArray; -/// A primitive array where each element is of type `TimestampMicrosecondType.` -/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) -pub type TimestampMicrosecondArray = PrimitiveArray; -/// A primitive array where each element is of type `TimestampNanosecondType.` -/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) -pub type TimestampNanosecondArray = PrimitiveArray; -pub type Date32Array = PrimitiveArray; -pub type Date64Array = PrimitiveArray; -pub type Time32SecondArray = PrimitiveArray; -pub type Time32MillisecondArray = PrimitiveArray; -pub type Time64MicrosecondArray = PrimitiveArray; -pub type Time64NanosecondArray = PrimitiveArray; -pub type IntervalYearMonthArray = PrimitiveArray; -pub type IntervalDayTimeArray = PrimitiveArray; -pub type IntervalMonthDayNanoArray = PrimitiveArray; -pub type DurationSecondArray = PrimitiveArray; -pub type DurationMillisecondArray = PrimitiveArray; -pub type DurationMicrosecondArray = PrimitiveArray; -pub type DurationNanosecondArray = PrimitiveArray; - -pub use self::array_binary::GenericBinaryArray; -pub use self::array_list::GenericListArray; -pub use self::array_list::OffsetSizeTrait; -pub use self::array_string::GenericStringArray; - -// --------------------- Array Builder --------------------- - -pub use self::builder::ArrayBuilder; -pub use self::builder::BinaryBuilder; -pub use self::builder::BooleanBufferBuilder; -pub use self::builder::BooleanBuilder; -pub use self::builder::BufferBuilder; -pub use self::builder::Decimal128Builder; -pub use self::builder::Decimal256Builder; - -#[deprecated(note = "Please use `Decimal128Builder` instead")] -pub type DecimalBuilder = Decimal128Builder; - -pub use self::builder::FixedSizeBinaryBuilder; -pub use self::builder::FixedSizeListBuilder; -pub use self::builder::GenericListBuilder; -pub use self::builder::GenericStringBuilder; -pub use self::builder::LargeBinaryBuilder; -pub use self::builder::LargeListBuilder; -pub use self::builder::LargeStringBuilder; -pub use self::builder::ListBuilder; -pub use self::builder::MapBuilder; -pub use self::builder::PrimitiveBuilder; -pub use self::builder::PrimitiveDictionaryBuilder; -pub use self::builder::StringBuilder; -pub use self::builder::StringDictionaryBuilder; -pub use self::builder::StructBuilder; -pub use self::builder::UnionBuilder; - -pub use self::builder::make_builder; - -pub type Int8BufferBuilder = BufferBuilder; -pub type Int16BufferBuilder = BufferBuilder; -pub type Int32BufferBuilder = BufferBuilder; -pub type Int64BufferBuilder = BufferBuilder; -pub type UInt8BufferBuilder = BufferBuilder; -pub type UInt16BufferBuilder = BufferBuilder; -pub type UInt32BufferBuilder = BufferBuilder; -pub type UInt64BufferBuilder = BufferBuilder; -pub type Float32BufferBuilder = BufferBuilder; -pub type Float64BufferBuilder = BufferBuilder; - -pub type TimestampSecondBufferBuilder = - BufferBuilder<::Native>; -pub type TimestampMillisecondBufferBuilder = - BufferBuilder<::Native>; -pub type TimestampMicrosecondBufferBuilder = - BufferBuilder<::Native>; -pub type TimestampNanosecondBufferBuilder = - BufferBuilder<::Native>; -pub type Date32BufferBuilder = BufferBuilder<::Native>; -pub type Date64BufferBuilder = BufferBuilder<::Native>; -pub type Time32SecondBufferBuilder = - BufferBuilder<::Native>; -pub type Time32MillisecondBufferBuilder = - BufferBuilder<::Native>; -pub type Time64MicrosecondBufferBuilder = - BufferBuilder<::Native>; -pub type Time64NanosecondBufferBuilder = - BufferBuilder<::Native>; -pub type IntervalYearMonthBufferBuilder = - BufferBuilder<::Native>; -pub type IntervalDayTimeBufferBuilder = - BufferBuilder<::Native>; -pub type IntervalMonthDayNanoBufferBuilder = - BufferBuilder<::Native>; -pub type DurationSecondBufferBuilder = - BufferBuilder<::Native>; -pub type DurationMillisecondBufferBuilder = - BufferBuilder<::Native>; -pub type DurationMicrosecondBufferBuilder = - BufferBuilder<::Native>; -pub type DurationNanosecondBufferBuilder = - BufferBuilder<::Native>; - -pub type Int8Builder = PrimitiveBuilder; -pub type Int16Builder = PrimitiveBuilder; -pub type Int32Builder = PrimitiveBuilder; -pub type Int64Builder = PrimitiveBuilder; -pub type UInt8Builder = PrimitiveBuilder; -pub type UInt16Builder = PrimitiveBuilder; -pub type UInt32Builder = PrimitiveBuilder; -pub type UInt64Builder = PrimitiveBuilder; -pub type Float32Builder = PrimitiveBuilder; -pub type Float64Builder = PrimitiveBuilder; - -pub type TimestampSecondBuilder = PrimitiveBuilder; -pub type TimestampMillisecondBuilder = PrimitiveBuilder; -pub type TimestampMicrosecondBuilder = PrimitiveBuilder; -pub type TimestampNanosecondBuilder = PrimitiveBuilder; -pub type Date32Builder = PrimitiveBuilder; -pub type Date64Builder = PrimitiveBuilder; -pub type Time32SecondBuilder = PrimitiveBuilder; -pub type Time32MillisecondBuilder = PrimitiveBuilder; -pub type Time64MicrosecondBuilder = PrimitiveBuilder; -pub type Time64NanosecondBuilder = PrimitiveBuilder; -pub type IntervalYearMonthBuilder = PrimitiveBuilder; -pub type IntervalDayTimeBuilder = PrimitiveBuilder; -pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder; -pub type DurationSecondBuilder = PrimitiveBuilder; -pub type DurationMillisecondBuilder = PrimitiveBuilder; -pub type DurationMicrosecondBuilder = PrimitiveBuilder; -pub type DurationNanosecondBuilder = PrimitiveBuilder; - pub use arrow_data::transform::{Capacities, MutableArrayData}; -// --------------------- Array Iterator --------------------- - -pub use self::iterator::*; - // --------------------- Array's values comparison --------------------- pub use self::ord::{build_compare, DynComparator}; - -// --------------------- Array downcast helper functions --------------------- - -pub use self::cast::{ - as_boolean_array, as_decimal_array, as_dictionary_array, as_generic_binary_array, - as_generic_list_array, as_large_list_array, as_largestring_array, as_list_array, - as_map_array, as_null_array, as_primitive_array, as_string_array, as_struct_array, - as_union_array, -}; - -// ------------------------------ C Data Interface --------------------------- - -#[cfg(feature = "ffi")] -pub use self::ffi::{export_array_into_raw, make_array_from_raw}; - -#[cfg(test)] -mod tests { - use crate::array::*; - - #[test] - fn test_buffer_builder_availability() { - let _builder = Int8BufferBuilder::new(10); - let _builder = Int16BufferBuilder::new(10); - let _builder = Int32BufferBuilder::new(10); - let _builder = Int64BufferBuilder::new(10); - let _builder = UInt16BufferBuilder::new(10); - let _builder = UInt32BufferBuilder::new(10); - let _builder = Float32BufferBuilder::new(10); - let _builder = Float64BufferBuilder::new(10); - let _builder = TimestampSecondBufferBuilder::new(10); - let _builder = TimestampMillisecondBufferBuilder::new(10); - let _builder = TimestampMicrosecondBufferBuilder::new(10); - let _builder = TimestampNanosecondBufferBuilder::new(10); - let _builder = Date32BufferBuilder::new(10); - let _builder = Date64BufferBuilder::new(10); - let _builder = Time32SecondBufferBuilder::new(10); - let _builder = Time32MillisecondBufferBuilder::new(10); - let _builder = Time64MicrosecondBufferBuilder::new(10); - let _builder = Time64NanosecondBufferBuilder::new(10); - let _builder = IntervalYearMonthBufferBuilder::new(10); - let _builder = IntervalDayTimeBufferBuilder::new(10); - let _builder = IntervalMonthDayNanoBufferBuilder::new(10); - let _builder = DurationSecondBufferBuilder::new(10); - let _builder = DurationMillisecondBufferBuilder::new(10); - let _builder = DurationMicrosecondBufferBuilder::new(10); - let _builder = DurationNanosecondBufferBuilder::new(10); - } -} diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 791363574c5..eab3dafda13 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -42,7 +42,6 @@ use std::ops::{Div, Mul}; use std::str; use std::sync::Arc; -use crate::array::as_datetime; use crate::buffer::MutableBuffer; use crate::compute::divide_scalar; use crate::compute::kernels::arithmetic::{divide, multiply}; @@ -54,8 +53,8 @@ use crate::compute::{try_unary, using_chrono_tz_and_utc_naive_date_time}; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::temporal_conversions::{ - EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS, - SECONDS_IN_DAY, + as_datetime, EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY, + NANOSECONDS, SECONDS_IN_DAY, }; use crate::{array::*, compute::take}; use crate::{buffer::Buffer, util::serialization::lexical_to_string}; diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs index 52664a17544..291324e65a8 100644 --- a/arrow/src/compute/kernels/filter.rs +++ b/arrow/src/compute/kernels/filter.rs @@ -675,13 +675,7 @@ where ) }; - unsafe { - DictionaryArray::::try_new_unchecked( - filtered_keys, - array.values().clone(), - data, - ) - } + DictionaryArray::from(data) } #[cfg(test)] diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index f46cf7f5ab5..e61fec999ad 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -19,10 +19,10 @@ use chrono::{Datelike, Timelike}; -use crate::array::as_datetime; use crate::array::*; use crate::datatypes::*; use crate::error::{ArrowError, Result}; +use arrow_array::temporal_conversions::{as_datetime, as_time}; use chrono::format::strftime::StrftimeItems; use chrono::format::{parse, Parsed}; diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 3ec605dd048..a8c71a8e019 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -676,10 +676,9 @@ fn parse( RecordBatch::try_new_with_options( projected_schema, arr, - &RecordBatchOptions { - match_field_names: true, - row_count: Some(rows.len()), - }, + &RecordBatchOptions::new() + .with_match_field_names(true) + .with_row_count(Some(rows.len())), ) }) } diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index e7d9bfd5a4f..01462aeca96 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -22,23 +22,18 @@ //! * [`Field`](crate::datatypes::Field) to describe one field within a schema. //! * [`DataType`](crate::datatypes::DataType) to describe the type of a field. -use std::sync::Arc; - mod native; pub use native::*; mod numeric; pub use numeric::*; -mod types; -pub use types::*; -mod delta; +pub use arrow_array::types::*; pub use arrow_data::decimal::*; -pub use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionMode}; +pub use arrow_schema::{ + DataType, Field, IntervalUnit, Schema, SchemaRef, TimeUnit, UnionMode, +}; #[cfg(feature = "ffi")] mod ffi; #[cfg(feature = "ffi")] pub use ffi::*; - -/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema). -pub type SchemaRef = Arc; diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index dec0cc4b53b..6ab82688e52 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -15,33 +15,12 @@ // specific language governing permissions and limitations // under the License. -use super::DataType; use crate::error::{ArrowError, Result}; pub use arrow_buffer::{ArrowNativeType, ToByteSlice}; use half::f16; use num::Zero; -/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the -/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`]. -pub trait ArrowPrimitiveType: 'static { - /// Corresponding Rust native type for the primitive type. - type Native: ArrowNativeType; - - /// the corresponding Arrow data type of this primitive type. - const DATA_TYPE: DataType; - - /// Returns the byte width of this primitive type. - fn get_byte_width() -> usize { - std::mem::size_of::() - } - - /// Returns a default value of this primitive type. - /// - /// This is useful for aggregate array ops like `sum()`, `mean()`. - fn default_value() -> Self::Native { - Default::default() - } -} +pub use arrow_array::ArrowPrimitiveType; pub(crate) mod native_op { use super::ArrowNativeType; diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs index c32e5ca1848..d1589402480 100644 --- a/arrow/src/json/reader.rs +++ b/arrow/src/json/reader.rs @@ -719,10 +719,9 @@ impl Decoder { RecordBatch::try_new_with_options( projected_schema, arr, - &RecordBatchOptions { - match_field_names: true, - row_count: Some(rows.len()), - }, + &RecordBatchOptions::new() + .with_match_field_names(true) + .with_row_count(Some(rows.len())), ) .map(Some) }) diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 5cc264b1392..606bffe9327 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -248,6 +248,8 @@ #![deny(clippy::redundant_clone)] #![warn(missing_debug_implementations)] +pub use arrow_array::{downcast_dictionary_array, downcast_primitive_array}; + pub use arrow_buffer::{alloc, buffer}; pub mod bitmap { @@ -270,8 +272,32 @@ pub mod ipc; pub mod json; #[cfg(feature = "pyarrow")] pub mod pyarrow; -pub mod record_batch; + +pub mod record_batch { + pub use arrow_array::{RecordBatch, RecordBatchOptions}; + use arrow_schema::{ArrowError, SchemaRef}; + + /// Trait for types that can read `RecordBatch`'s. + pub trait RecordBatchReader: + Iterator> + { + /// Returns the schema of this `RecordBatchReader`. + /// + /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this + /// reader should have the same schema as returned from this method. + fn schema(&self) -> SchemaRef; + + /// Reads the next `RecordBatch`. + #[deprecated( + since = "2.0.0", + note = "This method is deprecated in favour of `next` from the trait Iterator." + )] + fn next_batch(&mut self) -> Result, ArrowError> { + self.next().transpose() + } + } +} pub mod row; -pub mod temporal_conversions; +pub use arrow_array::temporal_conversions; pub mod tensor; pub mod util; diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index 310ffb8ee7a..a20657b5822 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -32,8 +32,5 @@ pub mod string_writer; #[cfg(any(test, feature = "test_utils"))] pub mod test_util; -mod trusted_len; -pub(crate) use trusted_len::trusted_len_unzip; - -pub mod decimal; +pub use arrow_array::decimal; pub(crate) mod reader_parser; From 09257fa348894e3327449934f421784f0fb483e2 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 23 Sep 2022 10:10:22 +0100 Subject: [PATCH 2/6] Fix ffi compilation --- arrow/src/array/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index c0972e2a969..201ee4eb9b7 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -172,6 +172,9 @@ pub use arrow_data::{ pub use arrow_data::transform::{Capacities, MutableArrayData}; +#[cfg(feature = "ffi")] +pub use self::ffi::{export_array_into_raw, make_array_from_raw}; + // --------------------- Array's values comparison --------------------- pub use self::ord::{build_compare, DynComparator}; From 626ea3789c42bdd4c96d597b67a15cd4b1158dcc Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 23 Sep 2022 10:25:12 +0100 Subject: [PATCH 3/6] Fix data_gen --- arrow/src/util/data_gen.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index 4d974409a0e..5dda410f008 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -47,10 +47,7 @@ pub fn create_random_batch( RecordBatch::try_new_with_options( schema, columns, - &RecordBatchOptions { - match_field_names: false, - row_count: None, - }, + &RecordBatchOptions::new().with_match_field_names(false), ) } From 65031f4efcc017bd093a3bdbb77bce1a4900b154 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 23 Sep 2022 10:36:44 +0100 Subject: [PATCH 4/6] Fix doc --- arrow-array/src/array/string_array.rs | 2 +- arrow-array/src/raw_pointer.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 6e34f125bb4..22ad81eaa3f 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -129,7 +129,7 @@ impl GenericStringArray { /// Convert a list array to a string array. /// /// Note: this performs potentially expensive UTF-8 validation, consider using - /// [`StringBuilder`][crate::array::StringBuilder] to avoid this + /// [`StringBuilder`][crate::builder::StringBuilder] to avoid this /// /// # Panics /// diff --git a/arrow-array/src/raw_pointer.rs b/arrow-array/src/raw_pointer.rs index 1016b808bc5..3e4233ea1b2 100644 --- a/arrow-array/src/raw_pointer.rs +++ b/arrow-array/src/raw_pointer.rs @@ -18,8 +18,8 @@ use std::ptr::NonNull; /// This struct is highly `unsafe` and offers the possibility to -/// self-reference a [crate::buffer::Buffer] from -/// [crate::array::ArrayData], as a pointer to the beginning of its +/// self-reference a [arrow_buffer::Buffer] from +/// [arrow_data::ArrayData], as a pointer to the beginning of its /// contents. pub(super) struct RawPtrBox { ptr: NonNull, From dbc076ccf54a9ee0629c29051a977e75129f6e24 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 23 Sep 2022 10:50:07 +0100 Subject: [PATCH 5/6] Doc tweaks --- arrow-array/src/array/mod.rs | 2 + arrow-array/src/builder/mod.rs | 5 +- arrow-array/src/cast.rs | 2 +- arrow-array/src/decimal.rs | 2 +- arrow-array/src/iterator.rs | 2 + arrow-array/src/lib.rs | 4 +- arrow-array/src/types.rs | 2 + arrow/src/array/mod.rs | 142 +-------------------------------- 8 files changed, 12 insertions(+), 149 deletions(-) diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index d9d2807e36c..e2ea6154912 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! The concrete array definitions + mod binary_array; use crate::types::*; diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index 41583e1219d..cd4a82890a2 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -15,10 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Defines a [`BufferBuilder`](crate::builder::BufferBuilder) capable -//! of creating a [`Buffer`](arrow_buffer::Buffer) which can be used -//! as an internal buffer in an [`ArrayData`](arrow_data::ArrayData) -//! object. +//! Defines builders for the various array types mod boolean_buffer_builder; pub use boolean_buffer_builder::*; diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs index 0556d30deac..653836b8d4e 100644 --- a/arrow-array/src/cast.rs +++ b/arrow-array/src/cast.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Defines helper functions for force [`Array`] downcasts +//! Defines helper functions for downcasting [`dyn Array`](Array) to concrete types use crate::array::*; use crate::types::*; diff --git a/arrow-array/src/decimal.rs b/arrow-array/src/decimal.rs index 605659290c7..323281d9233 100644 --- a/arrow-array/src/decimal.rs +++ b/arrow-array/src/decimal.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Decimal related utils +//! Decimal related utilities, types and functions use crate::types::{Decimal128Type, Decimal256Type, DecimalType}; use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs index 8f8e27998b0..25727e0d75f 100644 --- a/arrow-array/src/iterator.rs +++ b/arrow-array/src/iterator.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Idiomatic iterators for [`Array`](crate::Array) + use crate::array::{ ArrayAccessor, BooleanArray, DecimalArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray, GenericStringArray, PrimitiveArray, diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs index bd7549a193b..16e46f68ba0 100644 --- a/arrow-array/src/lib.rs +++ b/arrow-array/src/lib.rs @@ -139,12 +139,12 @@ //! Internally, arrays are represented by one or several [`Buffer`], the number and meaning of //! which depend on the array’s data type, as documented in the [Arrow specification]. //! -//! For example, the type `Int16Array` represents an array of 16-bit integers and consists of: +//! For example, the type [`Int16Array`] represents an array of 16-bit integers and consists of: //! //! * An optional [`Bitmap`] identifying any null values //! * A contiguous [`Buffer`] of 16-bit integers //! -//! Similarly, the type `StringArray` represents an array of UTF-8 strings and consists of: +//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings and consists of: //! //! * An optional [`Bitmap`] identifying any null values //! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8 sequences within the values buffer diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index b7c1a6318f9..581fdc767c2 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Zero-sized types used to parameterize generic array implementations + use crate::array::ArrowPrimitiveType; use crate::delta::shift_months; use arrow_data::decimal::{ diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 201ee4eb9b7..10009f5abde 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -15,147 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! The central type in Apache Arrow are arrays, which are a known-length sequence of values -//! all having the same type. This module provides concrete implementations of each type, as -//! well as an [`Array`] trait that can be used for type-erasure. -//! -//! # Downcasting an Array -//! -//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or [`ArrayRef`]. -//! For example, [`RecordBatch`](`crate::record_batch::RecordBatch`) stores columns as [`ArrayRef`]. -//! -//! Whilst these arrays can be passed directly to the -//! [`compute`](crate::compute), [`csv`](crate::csv), -//! [`json`](crate::json), etc... APIs, it is often the case that you -//! wish to interact with the data directly. This requires downcasting -//! to the concrete type of the array: -//! -//! ``` -//! # use arrow::array::{Array, Float32Array, Int32Array}; -//! # -//! fn sum_int32(array: &dyn Array) -> i32 { -//! let integers: &Int32Array = array.as_any().downcast_ref().unwrap(); -//! integers.iter().map(|val| val.unwrap_or_default()).sum() -//! } -//! -//! // Note: the values for positions corresponding to nulls will be arbitrary -//! fn as_f32_slice(array: &dyn Array) -> &[f32] { -//! array.as_any().downcast_ref::().unwrap().values() -//! } -//! ``` -//! -//! Additionally, there are convenient functions to do this casting -//! such as [`as_primitive_array`] and [`as_string_array`]: -//! -//! ``` -//! # use arrow::array::*; -//! # use arrow::datatypes::*; -//! # -//! fn as_f32_slice(array: &dyn Array) -> &[f32] { -//! // use as_primtive_array -//! as_primitive_array::(array).values() -//! } -//! ``` - -//! # Building an Array -//! -//! Most [`Array`] implementations can be constructed directly from iterators or [`Vec`] -//! -//! ``` -//! # use arrow::array::Int32Array; -//! # use arrow::array::StringArray; -//! # use arrow::array::ListArray; -//! # use arrow::datatypes::Int32Type; -//! # -//! Int32Array::from(vec![1, 2]); -//! Int32Array::from(vec![Some(1), None]); -//! Int32Array::from_iter([1, 2, 3, 4]); -//! Int32Array::from_iter([Some(1), Some(2), None, Some(4)]); -//! -//! StringArray::from(vec!["foo", "bar"]); -//! StringArray::from(vec![Some("foo"), None]); -//! StringArray::from_iter([Some("foo"), None]); -//! StringArray::from_iter_values(["foo", "bar"]); -//! -//! ListArray::from_iter_primitive::([ -//! Some(vec![Some(1), None, Some(3)]), -//! None, -//! Some(vec![]) -//! ]); -//! ``` -//! -//! Additionally [`ArrayBuilder`](crate::array::ArrayBuilder) implementations can be -//! used to construct arrays with a push-based interface -//! -//! ``` -//! # use arrow::array::Int16Array; -//! # -//! // Create a new builder with a capacity of 100 -//! let mut builder = Int16Array::builder(100); -//! -//! // Append a single primitive value -//! builder.append_value(1); -//! -//! // Append a null value -//! builder.append_null(); -//! -//! // Append a slice of primitive values -//! builder.append_slice(&[2, 3, 4]); -//! -//! // Build the array -//! let array = builder.finish(); -//! -//! assert_eq!( -//! 5, -//! array.len(), -//! "The array has 5 values, counting the null value" -//! ); -//! -//! assert_eq!(2, array.value(2), "Get the value with index 2"); -//! -//! assert_eq!( -//! &array.values()[3..5], -//! &[3, 4], -//! "Get slice of len 2 starting at idx 3" -//! ) -//! ``` -//! -//! # Zero-Copy Slicing -//! -//! Given an [`Array`] of arbitrary length, it is possible to create an owned slice of this -//! data. Internally this just increments some ref-counts, and so is incredibly cheap -//! -//! ```rust -//! # use std::sync::Arc; -//! # use arrow::array::{Array, Int32Array, ArrayRef}; -//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef; -//! -//! // Slice with offset 1 and length 2 -//! let sliced = array.slice(1, 2); -//! let ints = sliced.as_any().downcast_ref::().unwrap(); -//! assert_eq!(ints.values(), &[2, 3]); -//! ``` -//! -//! # Internal Representation -//! -//! Internally, arrays are represented by one or several [`Buffer`], the number and meaning of -//! which depend on the array’s data type, as documented in the [Arrow specification]. -//! -//! For example, the type `Int16Array` represents an array of 16-bit integers and consists of: -//! -//! * An optional [`Bitmap`] identifying any null values -//! * A contiguous [`Buffer`] of 16-bit integers -//! -//! Similarly, the type `StringArray` represents an array of UTF-8 strings and consists of: -//! -//! * An optional [`Bitmap`] identifying any null values -//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8 sequences within the values buffer -//! * A values [`Buffer`] of UTF-8 encoded string data -//! -//! [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html -//! [`&dyn Array`]: Array -//! [`Bitmap`]: crate::bitmap::Bitmap -//! [`Buffer`]: crate::buffer::Buffer +//! Re-exports APIs from [arrow_array] #[cfg(feature = "ffi")] mod ffi; From 988662196919a302810f171ef082f905541e7fa5 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 23 Sep 2022 17:08:46 +0100 Subject: [PATCH 6/6] Fix pyarrow --- arrow-pyarrow-integration-testing/src/lib.rs | 6 +++--- arrow/src/pyarrow.rs | 13 ------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index edfe2c680da..2e74f0cf66b 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use pyo3::prelude::*; use pyo3::wrap_pyfunction; -use arrow::array::{ArrayData, ArrayRef, Int64Array, make_array}; +use arrow::array::{Array, ArrayData, ArrayRef, Int64Array, make_array}; use arrow::compute::kernels; use arrow::datatypes::{DataType, Field, Schema}; use arrow::error::ArrowError; @@ -51,7 +51,7 @@ fn double(array: &PyAny, py: Python) -> PyResult { let array = kernels::arithmetic::add(array, array).map_err(to_py_err)?; // export - array.to_pyarrow(py) + array.data().to_pyarrow(py) } /// calls a lambda function that receives and returns an array @@ -63,7 +63,7 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult { let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef; // to py - let pyarray = array.to_pyarrow(py)?; + let pyarray = array.data().to_pyarrow(py)?; let pyarray = lambda.call1((pyarray,))?; let array = make_array(ArrayData::from_pyarrow(pyarray)?); diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index a775b2ce8bc..d8d5eee532e 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -166,19 +166,6 @@ impl PyArrowConvert for Vec { } } -impl PyArrowConvert for T -where - T: Array + From, -{ - fn from_pyarrow(value: &PyAny) -> PyResult { - Ok(ArrayData::from_pyarrow(value)?.into()) - } - - fn to_pyarrow(&self, py: Python) -> PyResult { - self.data().to_pyarrow(py) - } -} - impl PyArrowConvert for RecordBatch { fn from_pyarrow(value: &PyAny) -> PyResult { // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches