From 226608fefb331cc28b44ab40006d039ac4eb1142 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 22 Nov 2022 16:40:40 +0000 Subject: [PATCH 1/3] Add Row size methods (#3160) --- arrow/src/row/interner.rs | 21 +++++++++++++++++++++ arrow/src/row/mod.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/arrow/src/row/interner.rs b/arrow/src/row/interner.rs index e6c8f097241..63bcc303fd4 100644 --- a/arrow/src/row/interner.rs +++ b/arrow/src/row/interner.rs @@ -157,6 +157,15 @@ impl OrderPreservingInterner { pub fn value(&self, key: Interned) -> &[u8] { self.values.index(key) } + + /// Returns the size of this instance in bytes including self + pub fn size(&self) -> usize { + std::mem::size_of::() + + self.values.buffer_size() + + self.values.buffer_size() + + self.bucket.size() + + self.lookup.capacity() * std::mem::size_of::() + } } /// A buffer of `[u8]` indexed by `[Interned]` @@ -192,6 +201,11 @@ impl InternBuffer { self.offsets.push(self.values.len()); key } + + /// Returns the byte size of the associated buffers + fn buffer_size(&self) -> usize { + self.values.capacity() + self.offsets.capacity() * std::mem::size_of::() + } } impl Index for InternBuffer { @@ -324,6 +338,13 @@ impl Bucket { } } } + + /// Returns the size of this instance in bytes + fn size(&self) -> usize { + std::mem::size_of::() + + self.slots.capacity() * std::mem::size_of::() + + self.next.as_ref().map(|x| x.size()).unwrap_or_default() + } } #[cfg(test)] diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs index 1d0a58d954b..e14eea9d143 100644 --- a/arrow/src/row/mod.rs +++ b/arrow/src/row/mod.rs @@ -358,6 +358,14 @@ impl SortField { pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self { Self { options, data_type } } + + /// Return size of this instance in bytes. + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + self.data_type.size() + std::mem::size_of::() + - std::mem::size_of::() + } } impl RowConverter { @@ -480,6 +488,22 @@ impl RowConverter { }) .collect() } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + std::mem::size_of::() + + std::mem::size_of_val(&self.interners) + + self.fields.iter().map(|x| x.size()).sum::() + + self.interners.capacity() + * std::mem::size_of::>>() + + self + .interners + .iter() + .filter_map(|x| x.as_ref().map(|x| x.size())) + .sum::() + } } /// A row-oriented representation of arrow data, that is normalized for comparison. @@ -512,6 +536,16 @@ impl Rows { pub fn iter(&self) -> RowsIter<'_> { self.into_iter() } + + /// Returns the size of this instance in bytes + /// + /// Includes the size of `Self`. + pub fn size(&self) -> usize { + // Size of fields is accounted for as part of RowConverter + std::mem::size_of::() + + self.buffer.len() + + self.offsets.len() * std::mem::size_of::() + } } impl<'a> IntoIterator for &'a Rows { From 5cbdfab2e40bb90b1540955fdc446e1bcd44248f Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 22 Nov 2022 16:51:19 +0000 Subject: [PATCH 2/3] Fix copypasta --- arrow/src/row/interner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/row/interner.rs b/arrow/src/row/interner.rs index 63bcc303fd4..1c71b6a5521 100644 --- a/arrow/src/row/interner.rs +++ b/arrow/src/row/interner.rs @@ -161,7 +161,7 @@ impl OrderPreservingInterner { /// Returns the size of this instance in bytes including self pub fn size(&self) -> usize { std::mem::size_of::() - + self.values.buffer_size() + + self.keys.buffer_size() + self.values.buffer_size() + self.bucket.size() + self.lookup.capacity() * std::mem::size_of::() From 203505eaab4ea53b469c11fd9027a3ed04594427 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 23 Nov 2022 07:24:22 +0000 Subject: [PATCH 3/3] Fix --- arrow/src/row/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs index e14eea9d143..c57fd41ebc0 100644 --- a/arrow/src/row/mod.rs +++ b/arrow/src/row/mod.rs @@ -494,7 +494,6 @@ impl RowConverter { /// Includes the size of `Self`. pub fn size(&self) -> usize { std::mem::size_of::() - + std::mem::size_of_val(&self.interners) + self.fields.iter().map(|x| x.size()).sum::() + self.interners.capacity() * std::mem::size_of::>>()