diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 1b2ff0684a6..b39159ac4fa 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -297,6 +297,8 @@ #![deny(clippy::redundant_clone)] #![warn(missing_debug_implementations)] #![allow(rustdoc::invalid_html_tags)] +extern crate core; + pub use arrow_array::{downcast_dictionary_array, downcast_primitive_array}; pub use arrow_buffer::{alloc, buffer}; diff --git a/arrow/src/row/interner.rs b/arrow/src/row/interner.rs index e6c8f097241..63bcc303fd4 100644 --- a/arrow/src/row/interner.rs +++ b/arrow/src/row/interner.rs @@ -157,6 +157,15 @@ impl OrderPreservingInterner { pub fn value(&self, key: Interned) -> &[u8] { self.values.index(key) } + + /// Returns the size of this instance in bytes including self + pub fn size(&self) -> usize { + std::mem::size_of::() + + self.values.buffer_size() + + self.values.buffer_size() + + self.bucket.size() + + self.lookup.capacity() * std::mem::size_of::() + } } /// A buffer of `[u8]` indexed by `[Interned]` @@ -192,6 +201,11 @@ impl InternBuffer { self.offsets.push(self.values.len()); key } + + /// Returns the byte size of the associated buffers + fn buffer_size(&self) -> usize { + self.values.capacity() + self.offsets.capacity() * std::mem::size_of::() + } } impl Index for InternBuffer { @@ -324,6 +338,13 @@ impl Bucket { } } } + + /// Returns the size of this instance in bytes + fn size(&self) -> usize { + std::mem::size_of::() + + self.slots.capacity() * std::mem::size_of::() + + self.next.as_ref().map(|x| x.size()).unwrap_or_default() + } } #[cfg(test)] diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs index 1d0a58d954b..1ab229b3dcd 100644 --- a/arrow/src/row/mod.rs +++ b/arrow/src/row/mod.rs @@ -128,6 +128,7 @@ //! [compared]: PartialOrd //! [compare]: PartialOrd +use core::slice::SlicePattern; use std::cmp::Ordering; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -358,6 +359,14 @@ impl SortField { pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self { Self { options, data_type } } + + /// Return size of this instance in bytes. + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + self.data_type.size() + std::mem::size_of::() + - std::mem::size_of::() + } } impl RowConverter { @@ -480,6 +489,22 @@ impl RowConverter { }) .collect() } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + std::mem::size_of::() + + std::mem::size_of_val(&self.interners) + + self.fields.iter().map(|x| x.size()).sum::() + + self.interners.capacity() + * std::mem::size_of::>>() + + self + .interners + .iter() + .filter_map(|x| x.as_ref().map(|x| x.size())) + .sum::() + } } /// A row-oriented representation of arrow data, that is normalized for comparison. @@ -512,6 +537,15 @@ impl Rows { pub fn iter(&self) -> RowsIter<'_> { self.into_iter() } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + std::mem::size_of::() + + std::mem::size_of_val(self.buffer.as_slice()) + + std::mem::size_of_val(self.offsets.as_slice()) + } } impl<'a> IntoIterator for &'a Rows {