diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs index 566f3742e93..dcb6a1be724 100644 --- a/arrow-array/src/arithmetic.rs +++ b/arrow-array/src/arithmetic.rs @@ -45,60 +45,82 @@ pub trait ArrowNativeTypeOp: ArrowNativeType { /// The multiplicative identity const ONE: Self; + /// Checked addition operation fn add_checked(self, rhs: Self) -> Result; + /// Wrapping addition operation fn add_wrapping(self, rhs: Self) -> Self; + /// Checked subtraction operation fn sub_checked(self, rhs: Self) -> Result; + /// Wrapping subtraction operation fn sub_wrapping(self, rhs: Self) -> Self; + /// Checked multiplication operation fn mul_checked(self, rhs: Self) -> Result; + /// Wrapping multiplication operation fn mul_wrapping(self, rhs: Self) -> Self; + /// Checked division operation fn div_checked(self, rhs: Self) -> Result; + /// Wrapping division operation fn div_wrapping(self, rhs: Self) -> Self; + /// Checked remainder operation fn mod_checked(self, rhs: Self) -> Result; + /// Wrapping remainder operation fn mod_wrapping(self, rhs: Self) -> Self; + /// Checked negation operation fn neg_checked(self) -> Result; + /// Wrapping negation operation fn neg_wrapping(self) -> Self; + /// Checked exponentiation operation fn pow_checked(self, exp: u32) -> Result; + /// Wrapping exponentiation operation fn pow_wrapping(self, exp: u32) -> Self; + /// Returns true if zero else false fn is_zero(self) -> bool; + /// Compare operation fn compare(self, rhs: Self) -> Ordering; + /// Equality operation fn is_eq(self, rhs: Self) -> bool; + /// Not equal operation #[inline] fn is_ne(self, rhs: Self) -> bool { !self.is_eq(rhs) } + /// Less than operation #[inline] fn is_lt(self, rhs: Self) -> bool { self.compare(rhs).is_lt() } + /// Less than equals operation #[inline] fn is_le(self, rhs: Self) -> bool { self.compare(rhs).is_le() } + /// Greater than operation #[inline] fn is_gt(self, rhs: Self) -> bool { self.compare(rhs).is_gt() } + /// Greater than equals operation #[inline] fn is_ge(self, rhs: Self) -> bool { self.compare(rhs).is_ge() diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 31dde3a3dda..83af9760da7 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -91,7 +91,7 @@ impl BooleanArray { self.data.is_empty() } - // Returns a new boolean array builder + /// Returns a new boolean array builder pub fn builder(capacity: usize) -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) } diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 54699749f2f..204a36c3233 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -29,7 +29,9 @@ use std::any::Any; /// trait declaring an offset size, relevant for i32 vs i64 array types. pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer { + /// True for 64 bit offset size and false for 32 bit offset size const IS_LARGE: bool; + /// Prefix for the offset size const PREFIX: &'static str; } diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 307753a7117..5fc44d8965e 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -382,6 +382,7 @@ impl<'a, T: Array> Array for &'a T { /// The value at null indexes is unspecified, and implementations must not rely on a specific /// value such as [`Default::default`] being returned, however, it must not be undefined pub trait ArrayAccessor: Array { + /// The Arrow type of the element being accessed. type Item: Send + Sync; /// Returns the element at index `i` diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index bd68b9698ce..42d183238ea 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -165,21 +165,48 @@ pub type TimestampMicrosecondArray = PrimitiveArray; /// A primitive array where each element is of type `TimestampNanosecondType.` /// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray) pub type TimestampNanosecondArray = PrimitiveArray; + +// TODO: give examples for the below types + +/// A primitive array where each element is of 32-bit date type. pub type Date32Array = PrimitiveArray; +/// A primitive array where each element is of 64-bit date type. pub type Date64Array = PrimitiveArray; + +/// An array where each element is of 32-bit type representing time elapsed in seconds +/// since midnight. pub type Time32SecondArray = PrimitiveArray; +/// An array where each element is of 32-bit type representing time elapsed in milliseconds +/// since midnight. pub type Time32MillisecondArray = PrimitiveArray; +/// An array where each element is of 64-bit type representing time elapsed in microseconds +/// since midnight. pub type Time64MicrosecondArray = PrimitiveArray; +/// An array where each element is of 64-bit type representing time elapsed in nanoseconds +/// since midnight. pub type Time64NanosecondArray = PrimitiveArray; + +/// An array where each element is a “calendar” interval in months. pub type IntervalYearMonthArray = PrimitiveArray; +/// An array where each element is a “calendar” interval days and milliseconds. pub type IntervalDayTimeArray = PrimitiveArray; +/// An array where each element is a “calendar” interval in months, days, and nanoseconds. pub type IntervalMonthDayNanoArray = PrimitiveArray; + +/// An array where each element is an elapsed time type in seconds. pub type DurationSecondArray = PrimitiveArray; +/// An array where each element is an elapsed time type in milliseconds. pub type DurationMillisecondArray = PrimitiveArray; +/// An array where each element is an elapsed time type in microseconds. pub type DurationMicrosecondArray = PrimitiveArray; +/// An array where each element is an elapsed time type in nanoseconds. pub type DurationNanosecondArray = PrimitiveArray; +/// An array where each element is a 128-bits decimal with precision in [1, 38] and +/// scale in [-38, 38]. pub type Decimal128Array = PrimitiveArray; +/// An array where each element is a 256-bits decimal with precision in [1, 76] and +/// scale in [-76, 76]. pub type Decimal256Array = PrimitiveArray; /// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the @@ -256,7 +283,7 @@ impl PrimitiveArray { } } - // Returns a new primitive array builder + /// Returns a new primitive array builder pub fn builder(capacity: usize) -> PrimitiveBuilder { PrimitiveBuilder::::with_capacity(capacity) } @@ -749,6 +776,7 @@ impl<'a, T: ArrowPrimitiveType> PrimitiveArray { /// the type can be collected to `PrimitiveArray`. #[derive(Debug)] pub struct NativeAdapter { + /// Corresponding Rust native type if available pub native: Option, } diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs index 2ab01ccfe40..4f8638ee789 100644 --- a/arrow-array/src/builder/boolean_buffer_builder.rs +++ b/arrow-array/src/builder/boolean_buffer_builder.rs @@ -19,6 +19,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer}; use arrow_data::bit_mask; use std::ops::Range; +/// A builder for creating a boolean [`Buffer`] #[derive(Debug)] pub struct BooleanBufferBuilder { buffer: MutableBuffer, @@ -26,6 +27,7 @@ pub struct BooleanBufferBuilder { } impl BooleanBufferBuilder { + /// Creates a new `BooleanBufferBuilder` #[inline] pub fn new(capacity: usize) -> Self { let byte_capacity = bit_util::ceil(capacity, 8); @@ -33,16 +35,19 @@ impl BooleanBufferBuilder { Self { buffer, len: 0 } } + /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len` pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self { assert!(len <= buffer.len() * 8); Self { buffer, len } } + /// Returns the length of the buffer #[inline] pub fn len(&self) -> usize { self.len } + /// Sets a bit in the buffer at `index` #[inline] pub fn set_bit(&mut self, index: usize, v: bool) { if v { @@ -52,21 +57,25 @@ impl BooleanBufferBuilder { } } + /// Gets a bit in the buffer at `index` #[inline] pub fn get_bit(&self, index: usize) -> bool { bit_util::get_bit(self.buffer.as_slice(), index) } + /// Returns true if empty #[inline] pub fn is_empty(&self) -> bool { self.len == 0 } + /// Returns the capacity of the buffer #[inline] pub fn capacity(&self) -> usize { self.buffer.capacity() * 8 } + /// Advances the buffer by `additional` bits #[inline] pub fn advance(&mut self, additional: usize) { let new_len = self.len + additional; @@ -99,6 +108,7 @@ impl BooleanBufferBuilder { self.len = len; } + /// Appends a boolean `v` into the buffer #[inline] pub fn append(&mut self, v: bool) { self.advance(1); @@ -107,6 +117,7 @@ impl BooleanBufferBuilder { } } + /// Appends n `additional` bits of value `v` into the buffer #[inline] pub fn append_n(&mut self, additional: usize, v: bool) { self.advance(additional); @@ -118,6 +129,7 @@ impl BooleanBufferBuilder { } } + /// Appends a slice of booleans into the buffer #[inline] pub fn append_slice(&mut self, slice: &[bool]) { let additional = slice.len(); @@ -156,6 +168,7 @@ impl BooleanBufferBuilder { self.buffer.as_slice() } + /// Creates a [`Buffer`] #[inline] pub fn finish(&mut self) -> Buffer { let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs index d3146366d51..d4eed0de9de 100644 --- a/arrow-array/src/builder/buffer_builder.rs +++ b/arrow-array/src/builder/buffer_builder.rs @@ -21,47 +21,78 @@ use std::marker::PhantomData; use crate::types::*; +/// Buffer builder for signed 8-bit integer type. pub type Int8BufferBuilder = BufferBuilder; +/// Buffer builder for signed 16-bit integer type. pub type Int16BufferBuilder = BufferBuilder; +/// Buffer builder for signed 32-bit integer type. pub type Int32BufferBuilder = BufferBuilder; +/// Buffer builder for signed 64-bit integer type. pub type Int64BufferBuilder = BufferBuilder; +/// Buffer builder for usigned 8-bit integer type. pub type UInt8BufferBuilder = BufferBuilder; +/// Buffer builder for usigned 16-bit integer type. pub type UInt16BufferBuilder = BufferBuilder; +/// Buffer builder for usigned 32-bit integer type. pub type UInt32BufferBuilder = BufferBuilder; +/// Buffer builder for usigned 64-bit integer type. pub type UInt64BufferBuilder = BufferBuilder; +/// Buffer builder for 32-bit floating point type. pub type Float32BufferBuilder = BufferBuilder; +/// Buffer builder for 64-bit floating point type. pub type Float64BufferBuilder = BufferBuilder; +/// Buffer builder for timestamp type of second unit. pub type TimestampSecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for timestamp type of millisecond unit. pub type TimestampMillisecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for timestamp type of microsecond unit. pub type TimestampMicrosecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for timestamp type of nanosecond unit. pub type TimestampNanosecondBufferBuilder = BufferBuilder<::Native>; + +/// Buffer builder for 32-bit date type. pub type Date32BufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for 64-bit date type. pub type Date64BufferBuilder = BufferBuilder<::Native>; + +/// Buffer builder for 32-bit elaspsed time since midnight of second unit. pub type Time32SecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for 32-bit elaspsed time since midnight of millisecond unit. pub type Time32MillisecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for 64-bit elaspsed time since midnight of microsecond unit. pub type Time64MicrosecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for 64-bit elaspsed time since midnight of nanosecond unit. pub type Time64NanosecondBufferBuilder = BufferBuilder<::Native>; + +/// Buffer builder for “calendar” interval in months. pub type IntervalYearMonthBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for “calendar” interval in days and milliseconds. pub type IntervalDayTimeBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder “calendar” interval in months, days, and nanoseconds. pub type IntervalMonthDayNanoBufferBuilder = BufferBuilder<::Native>; + +/// Buffer builder for elaspsed time of second unit. pub type DurationSecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for elaspsed time of milliseconds unit. pub type DurationMillisecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for elaspsed time of microseconds unit. pub type DurationMicrosecondBufferBuilder = BufferBuilder<::Native>; +/// Buffer builder for elaspsed time of nanoseconds unit. pub type DurationNanosecondBufferBuilder = BufferBuilder<::Native>; @@ -124,6 +155,7 @@ impl BufferBuilder { } } + /// Creates a new builder from a [`MutableBuffer`] pub fn new_from_buffer(buffer: MutableBuffer) -> Self { let buffer_len = buffer.len(); Self { diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs index e9581922cca..4c8225adf15 100644 --- a/arrow-array/src/builder/fixed_size_binary_builder.rs +++ b/arrow-array/src/builder/fixed_size_binary_builder.rs @@ -24,6 +24,22 @@ use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; +/// A fixed size binary array builder +/// ``` +/// use arrow_array::builder::FixedSizeBinaryBuilder; +/// use arrow_array::Array; +/// +/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); +/// // [b"hello", null, b"arrow"] +/// builder.append_value(b"hello").unwrap(); +/// builder.append_null(); +/// builder.append_value(b"arrow").unwrap(); +/// +/// let array = builder.finish(); +/// assert_eq!(array.value(0), b"hello"); +/// assert!(array.is_null(1)); +/// assert_eq!(array.value(2), b"arrow"); +/// ``` #[derive(Debug)] pub struct FixedSizeBinaryBuilder { values_builder: UInt8BufferBuilder, diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 516c2292578..bc4ce466ac3 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -25,6 +25,44 @@ use std::any::Any; use std::sync::Arc; /// Array builder for [`FixedSizeListArray`] +/// ``` +/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array, Int32Array}; +/// let values_builder = Int32Builder::new(); +/// let mut builder = FixedSizeListBuilder::new(values_builder, 3); +/// +/// // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] +/// builder.values().append_value(0); +/// builder.values().append_value(1); +/// builder.values().append_value(2); +/// builder.append(true); +/// builder.values().append_null(); +/// builder.values().append_null(); +/// builder.values().append_null(); +/// builder.append(false); +/// builder.values().append_value(3); +/// builder.values().append_null(); +/// builder.values().append_value(5); +/// builder.append(true); +/// builder.values().append_value(6); +/// builder.values().append_value(7); +/// builder.values().append_null(); +/// builder.append(true); +/// let list_array = builder.finish(); +/// assert_eq!( +/// *list_array.value(0), +/// Int32Array::from(vec![Some(0), Some(1), Some(2)]) +/// ); +/// assert!(list_array.is_null(1)); +/// assert_eq!( +/// *list_array.value(2), +/// Int32Array::from(vec![Some(3), None, Some(5)]) +/// ); +/// assert_eq!( +/// *list_array.value(3), +/// Int32Array::from(vec![Some(6), Some(7), None]) +/// ) +/// ``` +/// #[derive(Debug)] pub struct FixedSizeListBuilder { null_buffer_builder: NullBufferBuilder, @@ -104,6 +142,7 @@ where &mut self.values_builder } + /// Returns the length of the list pub fn value_length(&self) -> i32 { self.list_len } diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 5602f88636c..737b4fa72de 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -24,6 +24,43 @@ use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; use std::sync::Arc; +/// Creates a new `MapBuilder` +/// ``` +/// use arrow_array::builder::{MapBuilder, Int32Builder, StringBuilder}; +/// use arrow_array::{StringArray, Int32Array}; +/// use std::sync::Arc; +/// +/// let string_builder = StringBuilder::new(); +/// let int_builder = Int32Builder::with_capacity(4); +/// +/// let mut builder = MapBuilder::new(None, string_builder, int_builder); +/// +/// let string_builder = builder.keys(); +/// string_builder.append_value("joe"); +/// string_builder.append_null(); +/// string_builder.append_null(); +/// string_builder.append_value("mark"); +/// +/// let int_builder = builder.values(); +/// int_builder.append_value(1); +/// int_builder.append_value(2); +/// int_builder.append_null(); +/// int_builder.append_value(4); +/// +/// builder.append(true).unwrap(); +/// builder.append(false).unwrap(); +/// builder.append(true).unwrap(); +/// +/// let arr = builder.finish(); +/// assert_eq!( +/// *arr.values(), +/// Int32Array::from(vec![Some(1), Some(2), None, Some(4)]) +/// ); +/// assert_eq!( +/// *arr.keys(), +/// StringArray::from(vec![Some("joe"), None, None, Some("mark")]) +/// ); +/// ``` #[derive(Debug)] pub struct MapBuilder { offsets_builder: BufferBuilder, @@ -33,10 +70,14 @@ pub struct MapBuilder { value_builder: V, } +/// Contains details of the mapping #[derive(Debug, Clone)] pub struct MapFieldNames { + /// [`Field`] name for map entries pub entry: String, + /// [`Field`] name for map key pub key: String, + /// [`Field`] name for map value pub value: String, } @@ -52,6 +93,7 @@ impl Default for MapFieldNames { #[allow(dead_code)] impl MapBuilder { + /// Creates a new `MapBuilder` pub fn new( field_names: Option, key_builder: K, @@ -61,6 +103,7 @@ impl MapBuilder { Self::with_capacity(field_names, key_builder, value_builder, capacity) } + /// Creates a new `MapBuilder` with capacity pub fn with_capacity( field_names: Option, key_builder: K, @@ -79,10 +122,12 @@ impl MapBuilder { } } + /// Returns the key array builder of the map pub fn keys(&mut self) -> &mut K { &mut self.key_builder } + /// Returns the value array builder of the map pub fn values(&mut self) -> &mut V { &mut self.value_builder } @@ -104,6 +149,7 @@ impl MapBuilder { Ok(()) } + /// Builds the [`MapArray`] pub fn finish(&mut self) -> MapArray { let len = self.len(); @@ -144,6 +190,7 @@ impl MapBuilder { MapArray::from(array_data) } + /// Builds the [`MapArray`] without resetting the builder. pub fn finish_cloned(&self) -> MapArray { let len = self.len(); diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index eaf8243973b..3486e396b67 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -128,11 +128,17 @@ pub trait ArrayBuilder: Any + Send { fn into_box_any(self: Box) -> Box; } +/// A list array builder with i32 offsets pub type ListBuilder = GenericListBuilder; +/// A list array builder with i64 offsets pub type LargeListBuilder = GenericListBuilder; +/// A binary array builder with i32 offsets pub type BinaryBuilder = GenericBinaryBuilder; +/// A binary array builder with i64 offsets pub type LargeBinaryBuilder = GenericBinaryBuilder; +/// A string array builder with i32 offsets pub type StringBuilder = GenericStringBuilder; +/// A string array builder with i64 offsets pub type LargeStringBuilder = GenericStringBuilder; diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index 7a1fbafc76f..ef420dcbc29 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -24,36 +24,69 @@ use arrow_data::ArrayData; use std::any::Any; use std::sync::Arc; +/// A signed 8-bit integer array builder. pub type Int8Builder = PrimitiveBuilder; +/// A signed 16-bit integer array builder. pub type Int16Builder = PrimitiveBuilder; +/// A signed 32-bit integer array builder. pub type Int32Builder = PrimitiveBuilder; +/// A signed 64-bit integer array builder. pub type Int64Builder = PrimitiveBuilder; +/// An usigned 8-bit integer array builder. pub type UInt8Builder = PrimitiveBuilder; +/// An usigned 16-bit integer array builder. pub type UInt16Builder = PrimitiveBuilder; +/// An usigned 32-bit integer array builder. pub type UInt32Builder = PrimitiveBuilder; +/// An usigned 64-bit integer array builder. pub type UInt64Builder = PrimitiveBuilder; +/// A 32-bit floating point array builder. pub type Float32Builder = PrimitiveBuilder; +/// A 64-bit floating point array builder. pub type Float64Builder = PrimitiveBuilder; +/// A timestamp second array builder. pub type TimestampSecondBuilder = PrimitiveBuilder; +/// A timestamp millisecond array builder. pub type TimestampMillisecondBuilder = PrimitiveBuilder; +/// A timestamp microsecond array builder. pub type TimestampMicrosecondBuilder = PrimitiveBuilder; +/// A timestamp nanosecond array builder. pub type TimestampNanosecondBuilder = PrimitiveBuilder; + +/// A 32-bit date array builder. pub type Date32Builder = PrimitiveBuilder; +/// A 64-bit date array builder. pub type Date64Builder = PrimitiveBuilder; + +/// A 32-bit elaspsed time in seconds array builder. pub type Time32SecondBuilder = PrimitiveBuilder; +/// A 32-bit elaspsed time in milliseconds array builder. pub type Time32MillisecondBuilder = PrimitiveBuilder; +/// A 64-bit elaspsed time in microseconds array builder. pub type Time64MicrosecondBuilder = PrimitiveBuilder; +/// A 64-bit elaspsed time in nanoseconds array builder. pub type Time64NanosecondBuilder = PrimitiveBuilder; + +/// A “calendar” interval in months array builder. pub type IntervalYearMonthBuilder = PrimitiveBuilder; +/// A “calendar” interval in days and milliseconds array builder. pub type IntervalDayTimeBuilder = PrimitiveBuilder; +/// A “calendar” interval in months, days, and nanoseconds array builder. pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder; + +/// An elapsed time in seconds array builder. pub type DurationSecondBuilder = PrimitiveBuilder; +/// An elapsed time in milliseconds array builder. pub type DurationMillisecondBuilder = PrimitiveBuilder; +/// An elapsed time in microseconds array builder. pub type DurationMicrosecondBuilder = PrimitiveBuilder; +/// An elapsed time in nanoseconds array builder. pub type DurationNanosecondBuilder = PrimitiveBuilder; +/// A decimal 128 array builder pub type Decimal128Builder = PrimitiveBuilder; +/// A decimal 256 array builder pub type Decimal256Builder = PrimitiveBuilder; /// Array builder for fixed-width primitive types @@ -120,6 +153,7 @@ impl PrimitiveBuilder { } } + /// Creates a new primitive array builder from buffers pub fn new_from_buffer( values_buffer: MutableBuffer, null_buffer: Option, @@ -157,6 +191,7 @@ impl PrimitiveBuilder { self.values_builder.advance(1); } + /// Appends `n` no. of null's into the builder #[inline] pub fn append_nulls(&mut self, n: usize) { self.null_buffer_builder.append_n_nulls(n); diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs index 5b8a7283528..4640902d870 100644 --- a/arrow-array/src/builder/primitive_dictionary_builder.rs +++ b/arrow-array/src/builder/primitive_dictionary_builder.rs @@ -193,6 +193,7 @@ where Ok(key) } + /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { self.keys_builder.append_null() diff --git a/arrow-array/src/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs index f44756b6bcc..878cfc72763 100644 --- a/arrow-array/src/builder/string_dictionary_builder.rs +++ b/arrow-array/src/builder/string_dictionary_builder.rs @@ -270,6 +270,7 @@ where Ok(key) } + /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { self.keys_builder.append_null() diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 98d0e1a1d27..12bcaf0944e 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -174,6 +174,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box, field_builders: Vec>) -> Self { Self { fields, @@ -182,6 +183,7 @@ impl StructBuilder { } } + /// Creates a new `StructBuilder` from vector of [`Field`] with `capacity` pub fn from_fields(fields: Vec, capacity: usize) -> Self { let mut builders = Vec::with_capacity(fields.len()); for field in &fields { diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs index 351f90bacfc..e7c5e8367e2 100644 --- a/arrow-array/src/iterator.rs +++ b/arrow-array/src/iterator.rs @@ -116,10 +116,15 @@ impl ExactSizeIterator for ArrayIter {} /// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray pub type PrimitiveIter<'a, T> = ArrayIter<&'a PrimitiveArray>; +/// an iterator that returns Some(T) or None, that can be used on any BooleanArray pub type BooleanIter<'a> = ArrayIter<&'a BooleanArray>; +/// an iterator that returns Some(T) or None, that can be used on any Utf8Array pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray>; +/// an iterator that returns Some(T) or None, that can be used on any BinaryArray pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray>; +/// an iterator that returns Some(T) or None, that can be used on any FixedSizeBinaryArray pub type FixedSizeBinaryIter<'a> = ArrayIter<&'a FixedSizeBinaryArray>; +/// an iterator that returns Some(T) or None, that can be used on any ListArray pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray>; #[cfg(test)] diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs index 15267d3080e..5fcd1f33d48 100644 --- a/arrow-array/src/lib.rs +++ b/arrow-array/src/lib.rs @@ -158,6 +158,9 @@ //! [`json`]: https://docs.rs/arrow/latest/arrow/json/index.html //! [`csv`]: https://docs.rs/arrow/latest/arrow/csv/index.html +#![deny(rustdoc::broken_intra_doc_links)] +#![warn(missing_docs)] + pub mod array; pub use array::*; diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 6f2385fa9b4..ea0eb385358 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -417,6 +417,7 @@ pub struct RecordBatchOptions { } impl RecordBatchOptions { + /// Creates a new `RecordBatchOptions` pub fn new() -> Self { Self { match_field_names: true, diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 13194d61f01..0646a7f29da 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -34,16 +34,19 @@ use std::ops::{Add, Sub}; // BooleanType is special: its bit-width is not the size of the primitive type, and its `index` // operation assumes bit-packing. +/// A boolean datatype #[derive(Debug)] pub struct BooleanType {} impl BooleanType { + /// Type represetings is arrow [`DataType`] pub const DATA_TYPE: DataType = DataType::Boolean; } macro_rules! make_type { - ($name:ident, $native_ty:ty, $data_ty:expr) => { + ($name:ident, $native_ty:ty, $data_ty:expr, $doc_string: literal) => { #[derive(Debug)] + #[doc = $doc_string] pub struct $name {} impl ArrowPrimitiveType for $name { @@ -53,89 +56,168 @@ macro_rules! make_type { }; } -make_type!(Int8Type, i8, DataType::Int8); -make_type!(Int16Type, i16, DataType::Int16); -make_type!(Int32Type, i32, DataType::Int32); -make_type!(Int64Type, i64, DataType::Int64); -make_type!(UInt8Type, u8, DataType::UInt8); -make_type!(UInt16Type, u16, DataType::UInt16); -make_type!(UInt32Type, u32, DataType::UInt32); -make_type!(UInt64Type, u64, DataType::UInt64); -make_type!(Float16Type, f16, DataType::Float16); -make_type!(Float32Type, f32, DataType::Float32); -make_type!(Float64Type, f64, DataType::Float64); +make_type!(Int8Type, i8, DataType::Int8, "A signed 8-bit integer type."); +make_type!( + Int16Type, + i16, + DataType::Int16, + "A signed 16-bit integer type." +); +make_type!( + Int32Type, + i32, + DataType::Int32, + "A signed 32-bit integer type." +); +make_type!( + Int64Type, + i64, + DataType::Int64, + "A signed 64-bit integer type." +); +make_type!( + UInt8Type, + u8, + DataType::UInt8, + "An unsigned 8-bit integer type." +); +make_type!( + UInt16Type, + u16, + DataType::UInt16, + "An unsigned 16-bit integer type." +); +make_type!( + UInt32Type, + u32, + DataType::UInt32, + "An unsigned 32-bit integer type." +); +make_type!( + UInt64Type, + u64, + DataType::UInt64, + "An unsigned 64-bit integer type." +); +make_type!( + Float16Type, + f16, + DataType::Float16, + "A 16-bit floating point number type." +); +make_type!( + Float32Type, + f32, + DataType::Float32, + "A 32-bit floating point number type." +); +make_type!( + Float64Type, + f64, + DataType::Float64, + "A 64-bit floating point number type." +); make_type!( TimestampSecondType, i64, - DataType::Timestamp(TimeUnit::Second, None) + DataType::Timestamp(TimeUnit::Second, None), + "A timestamp second type with an optional timezone." ); make_type!( TimestampMillisecondType, i64, - DataType::Timestamp(TimeUnit::Millisecond, None) + DataType::Timestamp(TimeUnit::Millisecond, None), + "A timestamp millisecond type with an optional timezone." ); make_type!( TimestampMicrosecondType, i64, - DataType::Timestamp(TimeUnit::Microsecond, None) + DataType::Timestamp(TimeUnit::Microsecond, None), + "A timestamp microsecond type with an optional timezone." ); make_type!( TimestampNanosecondType, i64, - DataType::Timestamp(TimeUnit::Nanosecond, None) + DataType::Timestamp(TimeUnit::Nanosecond, None), + "A timestamp nanosecond type with an optional timezone." +); +make_type!( + Date32Type, + i32, + DataType::Date32, + "A 32-bit date type representing the elapsed time since UNIX epoch in days(32 bits)." +); +make_type!( + Date64Type, + i64, + DataType::Date64, + "A 64-bit date type representing the elapsed time since UNIX epoch in days(32 bits)." +); +make_type!( + Time32SecondType, + i32, + DataType::Time32(TimeUnit::Second), + "A 32-bit time type representing the elapsed time since midnight in seconds." ); -make_type!(Date32Type, i32, DataType::Date32); -make_type!(Date64Type, i64, DataType::Date64); -make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second)); make_type!( Time32MillisecondType, i32, - DataType::Time32(TimeUnit::Millisecond) + DataType::Time32(TimeUnit::Millisecond), + "A 32-bit time type representing the elapsed time since midnight in milliseconds." ); make_type!( Time64MicrosecondType, i64, - DataType::Time64(TimeUnit::Microsecond) + DataType::Time64(TimeUnit::Microsecond), + "A 64-bit time type representing the elapsed time since midnight in microseconds." ); make_type!( Time64NanosecondType, i64, - DataType::Time64(TimeUnit::Nanosecond) + DataType::Time64(TimeUnit::Nanosecond), + "A 64-bit time type representing the elapsed time since midnight in nanoseconds." ); make_type!( IntervalYearMonthType, i32, - DataType::Interval(IntervalUnit::YearMonth) + DataType::Interval(IntervalUnit::YearMonth), + "A “calendar” interval type in months." ); make_type!( IntervalDayTimeType, i64, - DataType::Interval(IntervalUnit::DayTime) + DataType::Interval(IntervalUnit::DayTime), + "A “calendar” interval type in days and milliseconds." ); make_type!( IntervalMonthDayNanoType, i128, - DataType::Interval(IntervalUnit::MonthDayNano) + DataType::Interval(IntervalUnit::MonthDayNano), + "A “calendar” interval type in months, days, and nanoseconds." ); make_type!( DurationSecondType, i64, - DataType::Duration(TimeUnit::Second) + DataType::Duration(TimeUnit::Second), + "An elapsed time type in seconds." ); make_type!( DurationMillisecondType, i64, - DataType::Duration(TimeUnit::Millisecond) + DataType::Duration(TimeUnit::Millisecond), + "An elapsed time type in milliseconds." ); make_type!( DurationMicrosecondType, i64, - DataType::Duration(TimeUnit::Microsecond) + DataType::Duration(TimeUnit::Microsecond), + "An elapsed time type in microseconds." ); make_type!( DurationNanosecondType, i64, - DataType::Duration(TimeUnit::Nanosecond) + DataType::Duration(TimeUnit::Nanosecond), + "An elapsed time type in nanoseconds." ); /// A subtype of primitive type that represents legal dictionary keys. @@ -489,10 +571,15 @@ mod decimal { pub trait DecimalType: 'static + Send + Sync + ArrowPrimitiveType + decimal::DecimalTypeSealed { + /// Width of the type const BYTE_LENGTH: usize; + /// Maximum number of significant digits const MAX_PRECISION: u8; + /// Maximum no of digits after the decimal point (note the scale can be negative) const MAX_SCALE: i8; + /// fn to create its [`DataType`] const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType; + /// Default values for [`DataType`] const DEFAULT_TYPE: DataType; /// "Decimal128" or "Decimal256", for use in error messages @@ -621,10 +708,15 @@ pub(crate) mod bytes { /// /// See [Variable Size Binary Layout](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout) pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed { + /// Type of offset i.e i32/i64 type Offset: OffsetSizeTrait; + /// Type for representing its equivalent rust type i.e + /// Utf8Array will have native type has &str + /// BinaryArray will have type as [u8] type Native: bytes::ByteArrayNativeType + AsRef<[u8]> + ?Sized; /// "Binary" or "String", for use in error messages const PREFIX: &'static str; + /// Datatype of array elements const DATA_TYPE: DataType; } @@ -645,7 +737,9 @@ impl ByteArrayType for GenericStringType { }; } +/// An arrow utf8 array with i32 offsets pub type Utf8Type = GenericStringType; +/// An arrow utf8 array with i64 offsets pub type LargeUtf8Type = GenericStringType; /// [`ByteArrayType`] for binary arrays @@ -665,7 +759,9 @@ impl ByteArrayType for GenericBinaryType { }; } +/// An arrow binary array with i32 offsets pub type BinaryType = GenericBinaryType; +/// An arrow binary array with i64 offsets pub type LargeBinaryType = GenericBinaryType; #[cfg(test)] diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs index 21f96d90a5d..0f1c0064f5a 100644 --- a/arrow-json/src/lib.rs +++ b/arrow-json/src/lib.rs @@ -19,6 +19,9 @@ //! line-delimited records. See the module level documentation for the //! [`reader`] and [`writer`] for usage examples. +#![deny(rustdoc::broken_intra_doc_links)] +#![warn(missing_docs)] + pub mod reader; pub mod writer; @@ -30,6 +33,7 @@ use serde_json::{Number, Value}; /// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.). pub trait JsonSerializable: 'static { + /// Converts self into json value if its possible fn into_json_value(self) -> Option; } diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs index 646d9c0d197..0d3148c5a05 100644 --- a/arrow-json/src/reader.rs +++ b/arrow-json/src/reader.rs @@ -198,6 +198,7 @@ pub struct ValueIter<'a, R: Read> { } impl<'a, R: Read> ValueIter<'a, R> { + /// Creates a new `ValueIter` pub fn new(reader: &'a mut BufReader, max_read_records: Option) -> Self { Self { reader, @@ -613,6 +614,7 @@ impl Default for DecoderOptions { } impl DecoderOptions { + /// Creates a new `DecoderOptions` pub fn new() -> Self { Default::default() }