From ecaa747d44bf86f6f2a0c52a5c87645166906107 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 19 Jun 2022 07:58:42 -0400 Subject: [PATCH 1/4] minor: add a diagram to docstring for DictionaryArray --- arrow/src/array/array_dictionary.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 0fbd5a34eb6..eed1c4bbacc 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -33,6 +33,31 @@ use crate::error::Result; /// This is mostly used to represent strings or a limited set of primitive types as integers, /// for example when doing NLP analysis or representing chromosomes by name. /// +/// [`DictionaryArray`] are represented using a `keys` array and a +/// `values` array, which may be diferent lengths. The `keys` array +/// stores indexes in the `values` array which holding holds +/// the corresponding logical value, as shown here: +/// +/// ```text +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// ┌─────────────────┐ ┌─────────┐ │ ┌─────────────────┐ +///│ │ A │ │ 0 │ │ A │ values[keys[0]] +/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ +///│ │ D │ │ 2 │ │ B │ values[keys[1]] +/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ +///│ │ B │ │ 2 │ │ B │ values[keys[2]] +/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ +///│ │ C │ │ 1 │ │ D │ values[keys[3]] +/// ├─────────────────┤ └─────────┘ │ └─────────────────┘ +///│ │ E │ keys +/// └─────────────────┘ │ +///│ values Logical array +/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ Contents +/// +/// DictionaryArray +/// length = 4 +/// ``` +/// /// Example **with nullable** data: /// /// ``` From ea027eede757b387e50df1f066f965a2d722c514 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 19 Jun 2022 08:08:14 -0400 Subject: [PATCH 2/4] minor: clarify docstring on `DictionaryArray::lookup_key` --- arrow/src/array/array_dictionary.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index eed1c4bbacc..e9a0eab6262 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -153,7 +153,11 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray { &self.keys } - /// Returns the lookup key by doing reverse dictionary lookup + /// If `value` is present in `values` (aka the dictionary), + /// returns the coresponding key (index into the `values` + /// array). Otherwise returns `None`. + /// + /// Panics if `values` is not a [`StringArray`]. pub fn lookup_key(&self, value: &str) -> Option { let rd_buf: &StringArray = self.values.as_any().downcast_ref::().unwrap(); From dfb2f4ce2049ce4a624536c5cefc7348cab6a67b Mon Sep 17 00:00:00 2001 From: Wakahisa Date: Tue, 21 Jun 2022 01:40:47 +0200 Subject: [PATCH 3/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ruihang Xia Co-authored-by: Jörn Horstmann --- arrow/src/array/array_dictionary.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index eed1c4bbacc..4e6db4b2172 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -34,8 +34,8 @@ use crate::error::Result; /// for example when doing NLP analysis or representing chromosomes by name. /// /// [`DictionaryArray`] are represented using a `keys` array and a -/// `values` array, which may be diferent lengths. The `keys` array -/// stores indexes in the `values` array which holding holds +/// `values` array, which may be different lengths. The `keys` array +/// stores indexes in the `values` array which holds /// the corresponding logical value, as shown here: /// /// ```text From 8cf0dd3d68d6c7ec0f2d41574b0ac13fa7b20a8e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 21 Jun 2022 07:20:14 -0400 Subject: [PATCH 4/4] make values smaller and keys larger --- arrow/src/array/array_dictionary.rs | 35 ++++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index ac22d6e14f1..b6859a80965 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -40,22 +40,25 @@ use crate::error::Result; /// /// ```text /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -/// ┌─────────────────┐ ┌─────────┐ │ ┌─────────────────┐ -///│ │ A │ │ 0 │ │ A │ values[keys[0]] -/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ -///│ │ D │ │ 2 │ │ B │ values[keys[1]] -/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ -///│ │ B │ │ 2 │ │ B │ values[keys[2]] -/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ -///│ │ C │ │ 1 │ │ D │ values[keys[3]] -/// ├─────────────────┤ └─────────┘ │ └─────────────────┘ -///│ │ E │ keys -/// └─────────────────┘ │ -///│ values Logical array -/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ Contents -/// -/// DictionaryArray -/// length = 4 +/// ┌─────────────────┐ ┌─────────┐ │ ┌─────────────────┐ +/// │ │ A │ │ 0 │ │ A │ values[keys[0]] +/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ +/// │ │ D │ │ 2 │ │ B │ values[keys[1]] +/// ├─────────────────┤ ├─────────┤ │ ├─────────────────┤ +/// │ │ B │ │ 2 │ │ B │ values[keys[2]] +/// └─────────────────┘ ├─────────┤ │ ├─────────────────┤ +/// │ │ 1 │ │ D │ values[keys[3]] +/// ├─────────┤ │ ├─────────────────┤ +/// │ │ 1 │ │ D │ values[keys[4]] +/// ├─────────┤ │ ├─────────────────┤ +/// │ │ 0 │ │ A │ values[keys[5]] +/// └─────────┘ │ └─────────────────┘ +/// │ values keys +/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ +/// Logical array +/// Contents +/// DictionaryArray +/// length = 6 /// ``` /// /// Example **with nullable** data: