Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Doc improvements #3155

Merged
merged 9 commits into from Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions arrow-array/src/arithmetic.rs
Expand Up @@ -45,60 +45,82 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {
/// The multiplicative identity
const ONE: Self;

/// Checked addition operation
fn add_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping addition operation
fn add_wrapping(self, rhs: Self) -> Self;

/// Checked subtraction operation
fn sub_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping subtraction operation
fn sub_wrapping(self, rhs: Self) -> Self;

/// Checked multiplication operation
fn mul_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping multiplication operation
fn mul_wrapping(self, rhs: Self) -> Self;

/// Checked division operation
fn div_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping division operation
fn div_wrapping(self, rhs: Self) -> Self;

/// Checked remainder operation
fn mod_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping remainder operation
fn mod_wrapping(self, rhs: Self) -> Self;

/// Checked negation operation
fn neg_checked(self) -> Result<Self, ArrowError>;

/// Wrapping negation operation
fn neg_wrapping(self) -> Self;

/// Checked exponentiation operation
fn pow_checked(self, exp: u32) -> Result<Self, ArrowError>;

/// Wrapping exponentiation operation
fn pow_wrapping(self, exp: u32) -> Self;

/// Returns true if zero else false
fn is_zero(self) -> bool;

/// Compare operation
fn compare(self, rhs: Self) -> Ordering;

/// Equality operation
fn is_eq(self, rhs: Self) -> bool;

/// Not equal operation
#[inline]
fn is_ne(self, rhs: Self) -> bool {
!self.is_eq(rhs)
}

/// Less than operation
#[inline]
fn is_lt(self, rhs: Self) -> bool {
self.compare(rhs).is_lt()
}

/// Less than equals operation
#[inline]
fn is_le(self, rhs: Self) -> bool {
self.compare(rhs).is_le()
}

/// Greater than operation
#[inline]
fn is_gt(self, rhs: Self) -> bool {
self.compare(rhs).is_gt()
}

/// Greater than equals operation
#[inline]
fn is_ge(self, rhs: Self) -> bool {
self.compare(rhs).is_ge()
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/boolean_array.rs
Expand Up @@ -91,7 +91,7 @@ impl BooleanArray {
self.data.is_empty()
}

// Returns a new boolean array builder
/// Returns a new boolean array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::with_capacity(capacity)
}
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/list_array.rs
Expand Up @@ -29,7 +29,9 @@ use std::any::Any;

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
/// True for 64 bit offset size and false for 32 bit offset size
const IS_LARGE: bool;
/// Prefix for the offset size
const PREFIX: &'static str;
}

Expand Down
1 change: 1 addition & 0 deletions arrow-array/src/array/mod.rs
Expand Up @@ -382,6 +382,7 @@ impl<'a, T: Array> Array for &'a T {
/// The value at null indexes is unspecified, and implementations must not rely on a specific
/// value such as [`Default::default`] being returned, however, it must not be undefined
pub trait ArrayAccessor: Array {
/// The Arrow type of the element being accessed.
type Item: Send + Sync;

/// Returns the element at index `i`
Expand Down
30 changes: 29 additions & 1 deletion arrow-array/src/array/primitive_array.rs
Expand Up @@ -165,21 +165,48 @@ pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
/// A primitive array where each element is of type `TimestampNanosecondType.`
/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray)
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;

// TODO: give examples for the below types

/// A primitive array where each element is of 32-bit date type.
pub type Date32Array = PrimitiveArray<Date32Type>;
/// A primitive array where each element is of 64-bit date type.
pub type Date64Array = PrimitiveArray<Date64Type>;

/// An array where each element is of 32-bit type representing time elapsed in seconds
/// since midnight.
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
/// An array where each element is of 32-bit type representing time elapsed in milliseconds
/// since midnight.
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
/// An array where each element is of 64-bit type representing time elapsed in microseconds
/// since midnight.
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
/// An array where each element is of 64-bit type representing time elapsed in nanoseconds
/// since midnight.
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;

/// An array where each element is a “calendar” interval in months.
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
/// An array where each element is a “calendar” interval days and milliseconds.
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
/// An array where each element is a “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;

/// An array where each element is an elapsed time type in seconds.
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
/// An array where each element is an elapsed time type in milliseconds.
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
/// An array where each element is an elapsed time type in microseconds.
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
/// An array where each element is an elapsed time type in nanoseconds.
pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;

/// An array where each element is a 128-bits decimal with precision in [1, 38] and
/// scale in [-38, 38].
pub type Decimal128Array = PrimitiveArray<Decimal128Type>;
/// An array where each element is a 256-bits decimal with precision in [1, 76] and
/// scale in [-76, 76].
pub type Decimal256Array = PrimitiveArray<Decimal256Type>;

/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
Expand Down Expand Up @@ -256,7 +283,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
}
}

// Returns a new primitive array builder
/// Returns a new primitive array builder
pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::with_capacity(capacity)
}
Expand Down Expand Up @@ -749,6 +776,7 @@ impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
/// the type can be collected to `PrimitiveArray`.
#[derive(Debug)]
pub struct NativeAdapter<T: ArrowPrimitiveType> {
/// Corresponding Rust native type if available
pub native: Option<T::Native>,
}

Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/builder/boolean_buffer_builder.rs
Expand Up @@ -19,30 +19,35 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;

/// A builder for creating a boolean [`Buffer`]
#[derive(Debug)]
pub struct BooleanBufferBuilder {
buffer: MutableBuffer,
len: usize,
}

impl BooleanBufferBuilder {
/// Creates a new `BooleanBufferBuilder`
#[inline]
pub fn new(capacity: usize) -> Self {
let byte_capacity = bit_util::ceil(capacity, 8);
let buffer = MutableBuffer::new(byte_capacity);
Self { buffer, len: 0 }
}

/// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
assert!(len <= buffer.len() * 8);
Self { buffer, len }
}

/// Returns the length of the buffer
#[inline]
pub fn len(&self) -> usize {
self.len
}

/// Sets a bit in the buffer at `index`
#[inline]
pub fn set_bit(&mut self, index: usize, v: bool) {
if v {
Expand All @@ -52,21 +57,25 @@ impl BooleanBufferBuilder {
}
}

/// Gets a bit in the buffer at `index`
#[inline]
pub fn get_bit(&self, index: usize) -> bool {
bit_util::get_bit(self.buffer.as_slice(), index)
}

/// Returns true if empty
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}

/// Returns the capacity of the buffer
#[inline]
pub fn capacity(&self) -> usize {
self.buffer.capacity() * 8
}

/// Advances the buffer by `additional` bits
#[inline]
pub fn advance(&mut self, additional: usize) {
let new_len = self.len + additional;
Expand Down Expand Up @@ -99,6 +108,7 @@ impl BooleanBufferBuilder {
self.len = len;
}

/// Appends a boolean `v` into the buffer
#[inline]
pub fn append(&mut self, v: bool) {
self.advance(1);
Expand All @@ -107,6 +117,7 @@ impl BooleanBufferBuilder {
}
}

/// Appends n `additional` bits of value `v` into the buffer
#[inline]
pub fn append_n(&mut self, additional: usize, v: bool) {
self.advance(additional);
Expand All @@ -118,6 +129,7 @@ impl BooleanBufferBuilder {
}
}

/// Appends a slice of booleans into the buffer
#[inline]
pub fn append_slice(&mut self, slice: &[bool]) {
let additional = slice.len();
Expand Down Expand Up @@ -156,6 +168,7 @@ impl BooleanBufferBuilder {
self.buffer.as_slice()
}

/// Creates a [`Buffer`]
#[inline]
pub fn finish(&mut self) -> Buffer {
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
Expand Down
32 changes: 32 additions & 0 deletions arrow-array/src/builder/buffer_builder.rs
Expand Up @@ -21,47 +21,78 @@ use std::marker::PhantomData;

use crate::types::*;

/// Buffer builder for signed 8-bit integer type.
pub type Int8BufferBuilder = BufferBuilder<i8>;
/// Buffer builder for signed 16-bit integer type.
pub type Int16BufferBuilder = BufferBuilder<i16>;
/// Buffer builder for signed 32-bit integer type.
pub type Int32BufferBuilder = BufferBuilder<i32>;
/// Buffer builder for signed 64-bit integer type.
pub type Int64BufferBuilder = BufferBuilder<i64>;
/// Buffer builder for usigned 8-bit integer type.
pub type UInt8BufferBuilder = BufferBuilder<u8>;
/// Buffer builder for usigned 16-bit integer type.
pub type UInt16BufferBuilder = BufferBuilder<u16>;
/// Buffer builder for usigned 32-bit integer type.
pub type UInt32BufferBuilder = BufferBuilder<u32>;
/// Buffer builder for usigned 64-bit integer type.
pub type UInt64BufferBuilder = BufferBuilder<u64>;
/// Buffer builder for 32-bit floating point type.
pub type Float32BufferBuilder = BufferBuilder<f32>;
/// Buffer builder for 64-bit floating point type.
pub type Float64BufferBuilder = BufferBuilder<f64>;

/// Buffer builder for timestamp type of second unit.
pub type TimestampSecondBufferBuilder =
BufferBuilder<<TimestampSecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of millisecond unit.
pub type TimestampMillisecondBufferBuilder =
BufferBuilder<<TimestampMillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of microsecond unit.
pub type TimestampMicrosecondBufferBuilder =
BufferBuilder<<TimestampMicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of nanosecond unit.
pub type TimestampNanosecondBufferBuilder =
BufferBuilder<<TimestampNanosecondType as ArrowPrimitiveType>::Native>;

/// Buffer builder for 32-bit date type.
pub type Date32BufferBuilder = BufferBuilder<<Date32Type as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit date type.
pub type Date64BufferBuilder = BufferBuilder<<Date64Type as ArrowPrimitiveType>::Native>;

/// Buffer builder for 32-bit elaspsed time since midnight of second unit.
pub type Time32SecondBufferBuilder =
BufferBuilder<<Time32SecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 32-bit elaspsed time since midnight of millisecond unit.
pub type Time32MillisecondBufferBuilder =
BufferBuilder<<Time32MillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit elaspsed time since midnight of microsecond unit.
pub type Time64MicrosecondBufferBuilder =
BufferBuilder<<Time64MicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit elaspsed time since midnight of nanosecond unit.
pub type Time64NanosecondBufferBuilder =
BufferBuilder<<Time64NanosecondType as ArrowPrimitiveType>::Native>;

/// Buffer builder for “calendar” interval in months.
pub type IntervalYearMonthBufferBuilder =
BufferBuilder<<IntervalYearMonthType as ArrowPrimitiveType>::Native>;
/// Buffer builder for “calendar” interval in days and milliseconds.
pub type IntervalDayTimeBufferBuilder =
BufferBuilder<<IntervalDayTimeType as ArrowPrimitiveType>::Native>;
/// Buffer builder “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoBufferBuilder =
BufferBuilder<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native>;

/// Buffer builder for elaspsed time of second unit.
pub type DurationSecondBufferBuilder =
BufferBuilder<<DurationSecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of milliseconds unit.
pub type DurationMillisecondBufferBuilder =
BufferBuilder<<DurationMillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of microseconds unit.
pub type DurationMicrosecondBufferBuilder =
BufferBuilder<<DurationMicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of nanoseconds unit.
pub type DurationNanosecondBufferBuilder =
BufferBuilder<<DurationNanosecondType as ArrowPrimitiveType>::Native>;

Expand Down Expand Up @@ -124,6 +155,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
}
}

/// Creates a new builder from a [`MutableBuffer`]
pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
let buffer_len = buffer.len();
Self {
Expand Down
16 changes: 16 additions & 0 deletions arrow-array/src/builder/fixed_size_binary_builder.rs
Expand Up @@ -24,6 +24,22 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

/// A fixed size binary array builder
/// ```
/// use arrow_array::builder::FixedSizeBinaryBuilder;
/// use arrow_array::Array;
///
/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
/// // [b"hello", null, b"arrow"]
/// builder.append_value(b"hello").unwrap();
/// builder.append_null();
/// builder.append_value(b"arrow").unwrap();
///
/// let array = builder.finish();
/// assert_eq!(array.value(0), b"hello");
/// assert!(array.is_null(1));
/// assert_eq!(array.value(2), b"arrow");
/// ```
#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
values_builder: UInt8BufferBuilder,
Expand Down