Skip to content

Commit

Permalink
Doc improvements (#3155)
Browse files Browse the repository at this point in the history
* Improving arrow-json docs

* Improving arrow-array docs

* Fix tests

* Fix typos

* Incorporate review comments

* Improve doc for fixed_size_list_builder

* Fix doc comments
  • Loading branch information
psvri committed Nov 24, 2022
1 parent 007fb4c commit 2460c7b
Show file tree
Hide file tree
Showing 21 changed files with 387 additions and 31 deletions.
22 changes: 22 additions & 0 deletions arrow-array/src/arithmetic.rs
Expand Up @@ -45,60 +45,82 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {
/// The multiplicative identity
const ONE: Self;

/// Checked addition operation
fn add_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping addition operation
fn add_wrapping(self, rhs: Self) -> Self;

/// Checked subtraction operation
fn sub_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping subtraction operation
fn sub_wrapping(self, rhs: Self) -> Self;

/// Checked multiplication operation
fn mul_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping multiplication operation
fn mul_wrapping(self, rhs: Self) -> Self;

/// Checked division operation
fn div_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping division operation
fn div_wrapping(self, rhs: Self) -> Self;

/// Checked remainder operation
fn mod_checked(self, rhs: Self) -> Result<Self, ArrowError>;

/// Wrapping remainder operation
fn mod_wrapping(self, rhs: Self) -> Self;

/// Checked negation operation
fn neg_checked(self) -> Result<Self, ArrowError>;

/// Wrapping negation operation
fn neg_wrapping(self) -> Self;

/// Checked exponentiation operation
fn pow_checked(self, exp: u32) -> Result<Self, ArrowError>;

/// Wrapping exponentiation operation
fn pow_wrapping(self, exp: u32) -> Self;

/// Returns true if zero else false
fn is_zero(self) -> bool;

/// Compare operation
fn compare(self, rhs: Self) -> Ordering;

/// Equality operation
fn is_eq(self, rhs: Self) -> bool;

/// Not equal operation
#[inline]
fn is_ne(self, rhs: Self) -> bool {
!self.is_eq(rhs)
}

/// Less than operation
#[inline]
fn is_lt(self, rhs: Self) -> bool {
self.compare(rhs).is_lt()
}

/// Less than equals operation
#[inline]
fn is_le(self, rhs: Self) -> bool {
self.compare(rhs).is_le()
}

/// Greater than operation
#[inline]
fn is_gt(self, rhs: Self) -> bool {
self.compare(rhs).is_gt()
}

/// Greater than equals operation
#[inline]
fn is_ge(self, rhs: Self) -> bool {
self.compare(rhs).is_ge()
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/boolean_array.rs
Expand Up @@ -91,7 +91,7 @@ impl BooleanArray {
self.data.is_empty()
}

// Returns a new boolean array builder
/// Returns a new boolean array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::with_capacity(capacity)
}
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/list_array.rs
Expand Up @@ -29,7 +29,9 @@ use std::any::Any;

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
/// True for 64 bit offset size and false for 32 bit offset size
const IS_LARGE: bool;
/// Prefix for the offset size
const PREFIX: &'static str;
}

Expand Down
1 change: 1 addition & 0 deletions arrow-array/src/array/mod.rs
Expand Up @@ -382,6 +382,7 @@ impl<'a, T: Array> Array for &'a T {
/// The value at null indexes is unspecified, and implementations must not rely on a specific
/// value such as [`Default::default`] being returned, however, it must not be undefined
pub trait ArrayAccessor: Array {
/// The Arrow type of the element being accessed.
type Item: Send + Sync;

/// Returns the element at index `i`
Expand Down
30 changes: 29 additions & 1 deletion arrow-array/src/array/primitive_array.rs
Expand Up @@ -165,21 +165,48 @@ pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
/// A primitive array where each element is of type `TimestampNanosecondType.`
/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray)
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;

// TODO: give examples for the below types

/// A primitive array where each element is of 32-bit date type.
pub type Date32Array = PrimitiveArray<Date32Type>;
/// A primitive array where each element is of 64-bit date type.
pub type Date64Array = PrimitiveArray<Date64Type>;

/// An array where each element is of 32-bit type representing time elapsed in seconds
/// since midnight.
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
/// An array where each element is of 32-bit type representing time elapsed in milliseconds
/// since midnight.
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
/// An array where each element is of 64-bit type representing time elapsed in microseconds
/// since midnight.
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
/// An array where each element is of 64-bit type representing time elapsed in nanoseconds
/// since midnight.
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;

/// An array where each element is a “calendar” interval in months.
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
/// An array where each element is a “calendar” interval days and milliseconds.
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
/// An array where each element is a “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;

/// An array where each element is an elapsed time type in seconds.
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
/// An array where each element is an elapsed time type in milliseconds.
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
/// An array where each element is an elapsed time type in microseconds.
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
/// An array where each element is an elapsed time type in nanoseconds.
pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;

/// An array where each element is a 128-bits decimal with precision in [1, 38] and
/// scale in [-38, 38].
pub type Decimal128Array = PrimitiveArray<Decimal128Type>;
/// An array where each element is a 256-bits decimal with precision in [1, 76] and
/// scale in [-76, 76].
pub type Decimal256Array = PrimitiveArray<Decimal256Type>;

/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
Expand Down Expand Up @@ -256,7 +283,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
}
}

// Returns a new primitive array builder
/// Returns a new primitive array builder
pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::with_capacity(capacity)
}
Expand Down Expand Up @@ -749,6 +776,7 @@ impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
/// the type can be collected to `PrimitiveArray`.
#[derive(Debug)]
pub struct NativeAdapter<T: ArrowPrimitiveType> {
/// Corresponding Rust native type if available
pub native: Option<T::Native>,
}

Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/builder/boolean_buffer_builder.rs
Expand Up @@ -19,30 +19,35 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;

/// A builder for creating a boolean [`Buffer`]
#[derive(Debug)]
pub struct BooleanBufferBuilder {
buffer: MutableBuffer,
len: usize,
}

impl BooleanBufferBuilder {
/// Creates a new `BooleanBufferBuilder`
#[inline]
pub fn new(capacity: usize) -> Self {
let byte_capacity = bit_util::ceil(capacity, 8);
let buffer = MutableBuffer::new(byte_capacity);
Self { buffer, len: 0 }
}

/// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
assert!(len <= buffer.len() * 8);
Self { buffer, len }
}

/// Returns the length of the buffer
#[inline]
pub fn len(&self) -> usize {
self.len
}

/// Sets a bit in the buffer at `index`
#[inline]
pub fn set_bit(&mut self, index: usize, v: bool) {
if v {
Expand All @@ -52,21 +57,25 @@ impl BooleanBufferBuilder {
}
}

/// Gets a bit in the buffer at `index`
#[inline]
pub fn get_bit(&self, index: usize) -> bool {
bit_util::get_bit(self.buffer.as_slice(), index)
}

/// Returns true if empty
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}

/// Returns the capacity of the buffer
#[inline]
pub fn capacity(&self) -> usize {
self.buffer.capacity() * 8
}

/// Advances the buffer by `additional` bits
#[inline]
pub fn advance(&mut self, additional: usize) {
let new_len = self.len + additional;
Expand Down Expand Up @@ -99,6 +108,7 @@ impl BooleanBufferBuilder {
self.len = len;
}

/// Appends a boolean `v` into the buffer
#[inline]
pub fn append(&mut self, v: bool) {
self.advance(1);
Expand All @@ -107,6 +117,7 @@ impl BooleanBufferBuilder {
}
}

/// Appends n `additional` bits of value `v` into the buffer
#[inline]
pub fn append_n(&mut self, additional: usize, v: bool) {
self.advance(additional);
Expand All @@ -118,6 +129,7 @@ impl BooleanBufferBuilder {
}
}

/// Appends a slice of booleans into the buffer
#[inline]
pub fn append_slice(&mut self, slice: &[bool]) {
let additional = slice.len();
Expand Down Expand Up @@ -156,6 +168,7 @@ impl BooleanBufferBuilder {
self.buffer.as_slice()
}

/// Creates a [`Buffer`]
#[inline]
pub fn finish(&mut self) -> Buffer {
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
Expand Down
32 changes: 32 additions & 0 deletions arrow-array/src/builder/buffer_builder.rs
Expand Up @@ -21,47 +21,78 @@ use std::marker::PhantomData;

use crate::types::*;

/// Buffer builder for signed 8-bit integer type.
pub type Int8BufferBuilder = BufferBuilder<i8>;
/// Buffer builder for signed 16-bit integer type.
pub type Int16BufferBuilder = BufferBuilder<i16>;
/// Buffer builder for signed 32-bit integer type.
pub type Int32BufferBuilder = BufferBuilder<i32>;
/// Buffer builder for signed 64-bit integer type.
pub type Int64BufferBuilder = BufferBuilder<i64>;
/// Buffer builder for usigned 8-bit integer type.
pub type UInt8BufferBuilder = BufferBuilder<u8>;
/// Buffer builder for usigned 16-bit integer type.
pub type UInt16BufferBuilder = BufferBuilder<u16>;
/// Buffer builder for usigned 32-bit integer type.
pub type UInt32BufferBuilder = BufferBuilder<u32>;
/// Buffer builder for usigned 64-bit integer type.
pub type UInt64BufferBuilder = BufferBuilder<u64>;
/// Buffer builder for 32-bit floating point type.
pub type Float32BufferBuilder = BufferBuilder<f32>;
/// Buffer builder for 64-bit floating point type.
pub type Float64BufferBuilder = BufferBuilder<f64>;

/// Buffer builder for timestamp type of second unit.
pub type TimestampSecondBufferBuilder =
BufferBuilder<<TimestampSecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of millisecond unit.
pub type TimestampMillisecondBufferBuilder =
BufferBuilder<<TimestampMillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of microsecond unit.
pub type TimestampMicrosecondBufferBuilder =
BufferBuilder<<TimestampMicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of nanosecond unit.
pub type TimestampNanosecondBufferBuilder =
BufferBuilder<<TimestampNanosecondType as ArrowPrimitiveType>::Native>;

/// Buffer builder for 32-bit date type.
pub type Date32BufferBuilder = BufferBuilder<<Date32Type as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit date type.
pub type Date64BufferBuilder = BufferBuilder<<Date64Type as ArrowPrimitiveType>::Native>;

/// Buffer builder for 32-bit elaspsed time since midnight of second unit.
pub type Time32SecondBufferBuilder =
BufferBuilder<<Time32SecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 32-bit elaspsed time since midnight of millisecond unit.
pub type Time32MillisecondBufferBuilder =
BufferBuilder<<Time32MillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit elaspsed time since midnight of microsecond unit.
pub type Time64MicrosecondBufferBuilder =
BufferBuilder<<Time64MicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for 64-bit elaspsed time since midnight of nanosecond unit.
pub type Time64NanosecondBufferBuilder =
BufferBuilder<<Time64NanosecondType as ArrowPrimitiveType>::Native>;

/// Buffer builder for “calendar” interval in months.
pub type IntervalYearMonthBufferBuilder =
BufferBuilder<<IntervalYearMonthType as ArrowPrimitiveType>::Native>;
/// Buffer builder for “calendar” interval in days and milliseconds.
pub type IntervalDayTimeBufferBuilder =
BufferBuilder<<IntervalDayTimeType as ArrowPrimitiveType>::Native>;
/// Buffer builder “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoBufferBuilder =
BufferBuilder<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native>;

/// Buffer builder for elaspsed time of second unit.
pub type DurationSecondBufferBuilder =
BufferBuilder<<DurationSecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of milliseconds unit.
pub type DurationMillisecondBufferBuilder =
BufferBuilder<<DurationMillisecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of microseconds unit.
pub type DurationMicrosecondBufferBuilder =
BufferBuilder<<DurationMicrosecondType as ArrowPrimitiveType>::Native>;
/// Buffer builder for elaspsed time of nanoseconds unit.
pub type DurationNanosecondBufferBuilder =
BufferBuilder<<DurationNanosecondType as ArrowPrimitiveType>::Native>;

Expand Down Expand Up @@ -124,6 +155,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
}
}

/// Creates a new builder from a [`MutableBuffer`]
pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
let buffer_len = buffer.len();
Self {
Expand Down
16 changes: 16 additions & 0 deletions arrow-array/src/builder/fixed_size_binary_builder.rs
Expand Up @@ -24,6 +24,22 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

/// A fixed size binary array builder
/// ```
/// use arrow_array::builder::FixedSizeBinaryBuilder;
/// use arrow_array::Array;
///
/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
/// // [b"hello", null, b"arrow"]
/// builder.append_value(b"hello").unwrap();
/// builder.append_null();
/// builder.append_value(b"arrow").unwrap();
///
/// let array = builder.finish();
/// assert_eq!(array.value(0), b"hello");
/// assert!(array.is_null(1));
/// assert_eq!(array.value(2), b"arrow");
/// ```
#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
values_builder: UInt8BufferBuilder,
Expand Down

0 comments on commit 2460c7b

Please sign in to comment.