From 330c8ee763c5364b92eae0123709c6fddb26f1b6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 13 Jul 2022 18:31:27 -0700 Subject: [PATCH 1/2] Optimize filter_dict --- arrow/src/array/array_dictionary.rs | 22 ++++++++++++++++++++++ arrow/src/compute/kernels/filter.rs | 8 +++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 8a7e05aac33..1a6ae66a646 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -151,6 +151,28 @@ impl DictionaryArray { Ok(array.into()) } + /// Create a new DictionaryArray directly from specified keys + /// (indexes into the dictionary) and values (dictionary) + /// array, and the corresponding ArrayData. This is used internally + /// for the usage like filter kernel. + /// + /// # Safety + /// + /// The input keys, values and data must form a valid DictionaryArray, + /// or undefined behavior can results. + pub(crate) unsafe fn try_new_unchecked( + keys: PrimitiveArray, + values: ArrayRef, + data: ArrayData, + ) -> Self { + Self { + data, + keys, + values, + is_ordered: false, + } + } + /// Return an array view of the keys of this dictionary as a PrimitiveArray. pub fn keys(&self) -> &PrimitiveArray { &self.keys diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs index 1af93bff5ad..7b88de7b8e8 100644 --- a/arrow/src/compute/kernels/filter.rs +++ b/arrow/src/compute/kernels/filter.rs @@ -786,7 +786,13 @@ where ) }; - DictionaryArray::::from(data) + unsafe { + DictionaryArray::::try_new_unchecked( + filtered_keys, + array.values().clone(), + data, + ) + } } #[cfg(test)] From 3983af9a48d610af39cdcfb34d351050488ef897 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 13 Jul 2022 20:34:03 -0700 Subject: [PATCH 2/2] For review --- arrow/src/array/array_dictionary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 1a6ae66a646..9350daae53e 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -159,7 +159,7 @@ impl DictionaryArray { /// # Safety /// /// The input keys, values and data must form a valid DictionaryArray, - /// or undefined behavior can results. + /// or undefined behavior can occur. pub(crate) unsafe fn try_new_unchecked( keys: PrimitiveArray, values: ArrayRef,