From deb0fa04e2514b55b30e3689308b0070a5f21428 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 13 Jun 2022 11:56:58 +0100 Subject: [PATCH] Add vec-inspired APIs to BufferBuilder (#1850) --- arrow/src/array/builder.rs | 93 ++++++++++++++++++++++++++++++++++++- arrow/src/buffer/mutable.rs | 15 ++++++ 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs index 041b7a92c33..ed26d3c2f48 100644 --- a/arrow/src/array/builder.rs +++ b/arrow/src/array/builder.rs @@ -180,8 +180,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn advance(&mut self, i: usize) { - let new_buffer_len = (self.len + i) * mem::size_of::(); - self.buffer.resize(new_buffer_len, 0); + self.buffer.extend_zeros(i * mem::size_of::()); self.len += i; } @@ -244,6 +243,24 @@ impl BufferBuilder { self.len += n; } + /// Appends `n`, zero-initialized values + /// + /// # Example: + /// + /// ``` + /// use arrow::array::UInt32BufferBuilder; + /// + /// let mut builder = UInt32BufferBuilder::new(10); + /// builder.append_n_zeroed(3); + /// + /// assert_eq!(builder.len(), 3); + /// assert_eq!(builder.as_slice(), &[0, 0, 0]) + #[inline] + pub fn append_n_zeroed(&mut self, n: usize) { + self.buffer.extend_zeros(n * mem::size_of::()); + self.len += n; + } + /// Appends a slice of type `T`, growing the internal buffer as needed. /// /// # Example: @@ -262,6 +279,78 @@ impl BufferBuilder { self.len += slice.len(); } + /// View the contents of this buffer as a slice + /// + /// ``` + /// use arrow::array::Float64BufferBuilder; + /// + /// let mut builder = Float64BufferBuilder::new(10); + /// builder.append(1.3); + /// builder.append_n(2, 2.3); + /// + /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]); + /// ``` + #[inline] + pub fn as_slice(&self) -> &[T] { + // SAFETY + // + // - MutableBuffer is aligned and initialized for len elements of T + // - MutableBuffer corresponds to a single allocation + // - MutableBuffer does not support modification whilst active immutable borrows + unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) } + } + + /// View the contents of this buffer as a mutable slice + /// + /// # Example: + /// + /// ``` + /// use arrow::array::Float32BufferBuilder; + /// + /// let mut builder = Float32BufferBuilder::new(10); + /// + /// builder.append_slice(&[1., 2., 3.4]); + /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]); + /// + /// builder.as_slice_mut()[1] = 4.2; + /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]); + /// ``` + #[inline] + pub fn as_slice_mut(&mut self) -> &mut [T] { + // SAFETY + // + // - MutableBuffer is aligned and initialized for len elements of T + // - MutableBuffer corresponds to a single allocation + // - MutableBuffer does not support modification whilst active immutable borrows + unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) } + } + + /// Shorten this BufferBuilder to `len` items + /// + /// If `len` is greater than the builder's current length, this has no effect + /// + /// # Example: + /// + /// ``` + /// use arrow::array::UInt16BufferBuilder; + /// + /// let mut builder = UInt16BufferBuilder::new(10); + /// + /// builder.append_slice(&[42, 44, 46]); + /// assert_eq!(builder.as_slice(), &[42, 44, 46]); + /// + /// builder.truncate(2); + /// assert_eq!(builder.as_slice(), &[42, 44]); + /// + /// builder.append(12); + /// assert_eq!(builder.as_slice(), &[42, 44, 12]); + /// ``` + #[inline] + pub fn truncate(&mut self, len: usize) { + self.buffer.truncate(len * mem::size_of::()); + self.len = len; + } + /// # Safety /// This requires the iterator be a trusted length. This could instead require /// the iterator implement `TrustedLen` once that is stabilized. diff --git a/arrow/src/buffer/mutable.rs b/arrow/src/buffer/mutable.rs index 709973b4401..ef3e35209a1 100644 --- a/arrow/src/buffer/mutable.rs +++ b/arrow/src/buffer/mutable.rs @@ -30,7 +30,11 @@ use std::ptr::NonNull; /// along cache lines and in multiple of 64 bytes. /// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice] /// to insert many items, and `into` to convert it to [`Buffer`]. +/// +/// For a safe, strongly typed API consider using [`crate::array::BufferBuilder`] +/// /// # Example +/// /// ``` /// # use arrow::buffer::{Buffer, MutableBuffer}; /// let mut buffer = MutableBuffer::new(0); @@ -152,6 +156,17 @@ impl MutableBuffer { } } + /// Truncates this buffer to `len` bytes + /// + /// If `len` is greater than the buffer's current length, this has no effect + #[inline(always)] + pub fn truncate(&mut self, len: usize) { + if len > self.len { + return; + } + self.len = len; + } + /// Resizes the buffer, either truncating its contents (with no change in capacity), or /// growing it (potentially reallocating it) and writing `value` in the newly available bytes. /// # Example