Skip to content

Commit

Permalink
Add Row size methods (apache#3160)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Nov 22, 2022
1 parent a110004 commit 51bcc9a
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
2 changes: 2 additions & 0 deletions arrow/src/lib.rs
Expand Up @@ -297,6 +297,8 @@
#![deny(clippy::redundant_clone)]
#![warn(missing_debug_implementations)]
#![allow(rustdoc::invalid_html_tags)]
extern crate core;

pub use arrow_array::{downcast_dictionary_array, downcast_primitive_array};

pub use arrow_buffer::{alloc, buffer};
Expand Down
21 changes: 21 additions & 0 deletions arrow/src/row/interner.rs
Expand Up @@ -157,6 +157,15 @@ impl OrderPreservingInterner {
pub fn value(&self, key: Interned) -> &[u8] {
self.values.index(key)
}

/// Returns the size of this instance in bytes including self
pub fn size(&self) -> usize {
std::mem::size_of::<Self>()
+ self.values.buffer_size()
+ self.values.buffer_size()
+ self.bucket.size()
+ self.lookup.capacity() * std::mem::size_of::<Interned>()
}
}

/// A buffer of `[u8]` indexed by `[Interned]`
Expand Down Expand Up @@ -192,6 +201,11 @@ impl InternBuffer {
self.offsets.push(self.values.len());
key
}

/// Returns the byte size of the associated buffers
fn buffer_size(&self) -> usize {
self.values.capacity() + self.offsets.capacity() * std::mem::size_of::<usize>()
}
}

impl Index<Interned> for InternBuffer {
Expand Down Expand Up @@ -324,6 +338,13 @@ impl Bucket {
}
}
}

/// Returns the size of this instance in bytes
fn size(&self) -> usize {
std::mem::size_of::<Self>()
+ self.slots.capacity() * std::mem::size_of::<Slot>()
+ self.next.as_ref().map(|x| x.size()).unwrap_or_default()
}
}

#[cfg(test)]
Expand Down
34 changes: 34 additions & 0 deletions arrow/src/row/mod.rs
Expand Up @@ -128,6 +128,7 @@
//! [compared]: PartialOrd
//! [compare]: PartialOrd

use core::slice::SlicePattern;
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
Expand Down Expand Up @@ -358,6 +359,14 @@ impl SortField {
pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
Self { options, data_type }
}

/// Return size of this instance in bytes.
///
/// Includes the size of `Self`.
pub fn size(&self) -> usize {
self.data_type.size() + std::mem::size_of::<Self>()
- std::mem::size_of::<DataType>()
}
}

impl RowConverter {
Expand Down Expand Up @@ -480,6 +489,22 @@ impl RowConverter {
})
.collect()
}

/// Returns the size of this instance in bytes
///
/// Includes the size of `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>()
+ std::mem::size_of_val(&self.interners)
+ self.fields.iter().map(|x| x.size()).sum::<usize>()
+ self.interners.capacity()
* std::mem::size_of::<Option<Box<OrderPreservingInterner>>>()
+ self
.interners
.iter()
.filter_map(|x| x.as_ref().map(|x| x.size()))
.sum::<usize>()
}
}

/// A row-oriented representation of arrow data, that is normalized for comparison.
Expand Down Expand Up @@ -512,6 +537,15 @@ impl Rows {
pub fn iter(&self) -> RowsIter<'_> {
self.into_iter()
}

/// Returns the size of this instance in bytes
///
/// Includes the size of `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>()
+ std::mem::size_of_val(self.buffer.as_slice())
+ std::mem::size_of_val(self.offsets.as_slice())
}
}

impl<'a> IntoIterator for &'a Rows {
Expand Down

0 comments on commit 51bcc9a

Please sign in to comment.