From f837d34c9b2ae250448330762a874990df33e76a Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 6 May 2022 11:15:44 +1000 Subject: [PATCH 1/3] Aligns MutableDictionaryArray's with MutablePrimitiveArrays with TryPush Also removes the ability to build an invalid map. try_push_valid relied on the user pushing a value to the values stack. --- src/array/dictionary/mutable.rs | 78 +++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index c322ed63d49..baa1108d516 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -3,9 +3,8 @@ use std::{collections::hash_map::DefaultHasher, sync::Arc}; use hash_hasher::HashedMap; -use crate::array::TryExtend; use crate::{ - array::{primitive::MutablePrimitiveArray, Array, MutableArray}, + array::{primitive::MutablePrimitiveArray, Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, datatypes::DataType, error::{ArrowError, Result}, @@ -14,6 +13,19 @@ use crate::{ use super::{DictionaryArray, DictionaryKey}; /// A mutable, strong-typed version of [`DictionaryArray`]. +/// +/// # Example +/// Building a UTF8 dictionary with `i32` keys. +/// ``` +/// # use arrow2::array::{MutableDictionaryArray, MutableUtf8Array, TryPush}; +/// # fn main() -> Result<(), Box> { +/// let mut array: MutableDictionaryArray> = MutableDictionaryArray::new(); +/// array.try_push(Some("A"))?; +/// array.try_push(Some("B"))?; +/// array.try_push(None)?; +/// array.try_push(Some("C"))?; +/// # } +/// ``` #[derive(Debug)] pub struct MutableDictionaryArray { data_type: DataType, @@ -68,7 +80,7 @@ impl Default for MutableDictionaryA impl MutableDictionaryArray { /// Returns whether the value should be pushed to the values or not - pub fn try_push_valid(&mut self, value: &T) -> Result { + fn try_push_valid(&mut self, value: &T) -> Result { let mut hasher = DefaultHasher::new(); value.hash(&mut hasher); let hash = hasher.finish(); @@ -181,3 +193,63 @@ where Ok(()) } } + +impl TryPush> for MutableDictionaryArray +where + K: DictionaryKey, + M: MutableArray + TryPush>, + T: Hash, +{ + fn try_push(&mut self, item: Option) -> Result<()> { + if let Some(value) = item { + if self.try_push_valid(&value)? { + self.values.try_push(Some(value)) + } else { + Ok(()) + } + } else { + self.push_null(); + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::{MutableUtf8Array, Utf8Array}; + + #[test] + fn test_push_dictionary() { + let mut new: MutableDictionaryArray> = + MutableDictionaryArray::new(); + + for value in [Some("A"), Some("B"), None, Some("C"), Some("A"), Some("B")] { + new.try_push(value).unwrap(); + } + + let arc = new.values.as_arc(); + let values_array: &Utf8Array = arc.as_any().downcast_ref().unwrap(); + + assert_eq!(*values_array, Utf8Array::::from_slice(["A", "B", "C"])); + + let mut expected_keys = MutablePrimitiveArray::from_slice(&[0, 1]); + expected_keys.push(None); + expected_keys.push(Some(2)); + expected_keys.push(Some(0)); + expected_keys.push(Some(1)); + assert_eq!(new.keys, expected_keys); + + let expected_map = ["A", "B", "C"] + .iter() + .enumerate() + .map(|(index, value)| { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + let hash = hasher.finish(); + (hash, index as i32) + }) + .collect::>(); + assert_eq!(new.map, expected_map); + } +} From 3b9adcb028872abafc8e78fe761cd76ef43df564 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 6 May 2022 19:14:24 +1000 Subject: [PATCH 2/3] Move tests to /tests/it/array/dictionary on request. Satisfy Miri with doctest --- src/array/dictionary/mutable.rs | 51 ++++++---------------------- tests/it/array/dictionary/mutable.rs | 36 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index baa1108d516..d952e6cd4c5 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -24,6 +24,7 @@ use super::{DictionaryArray, DictionaryKey}; /// array.try_push(Some("B"))?; /// array.try_push(None)?; /// array.try_push(Some("C"))?; +/// # Ok(()) /// # } /// ``` #[derive(Debug)] @@ -130,6 +131,16 @@ impl MutableDictionaryArray { self.values.shrink_to_fit(); self.keys.shrink_to_fit(); } + + /// Returns the dictionary map + pub fn map(&self) -> &HashedMap { + &self.map + } + + /// Returns the dictionary keys + pub fn keys(&self) -> &MutablePrimitiveArray { + &self.keys + } } impl MutableArray for MutableDictionaryArray { @@ -213,43 +224,3 @@ where } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::{MutableUtf8Array, Utf8Array}; - - #[test] - fn test_push_dictionary() { - let mut new: MutableDictionaryArray> = - MutableDictionaryArray::new(); - - for value in [Some("A"), Some("B"), None, Some("C"), Some("A"), Some("B")] { - new.try_push(value).unwrap(); - } - - let arc = new.values.as_arc(); - let values_array: &Utf8Array = arc.as_any().downcast_ref().unwrap(); - - assert_eq!(*values_array, Utf8Array::::from_slice(["A", "B", "C"])); - - let mut expected_keys = MutablePrimitiveArray::from_slice(&[0, 1]); - expected_keys.push(None); - expected_keys.push(Some(2)); - expected_keys.push(Some(0)); - expected_keys.push(Some(1)); - assert_eq!(new.keys, expected_keys); - - let expected_map = ["A", "B", "C"] - .iter() - .enumerate() - .map(|(index, value)| { - let mut hasher = DefaultHasher::new(); - value.hash(&mut hasher); - let hash = hasher.finish(); - (hash, index as i32) - }) - .collect::>(); - assert_eq!(new.map, expected_map); - } -} diff --git a/tests/it/array/dictionary/mutable.rs b/tests/it/array/dictionary/mutable.rs index 924ba90646b..24b40f0a6f3 100644 --- a/tests/it/array/dictionary/mutable.rs +++ b/tests/it/array/dictionary/mutable.rs @@ -1,5 +1,8 @@ use arrow2::array::*; use arrow2::error::Result; +use hash_hasher::HashedMap; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; #[test] fn primitive() -> Result<()> { @@ -38,3 +41,36 @@ fn binary_natural() -> Result<()> { assert_eq!(a.values().len(), 2); Ok(()) } + +#[test] +fn push_utf8() { + let mut new: MutableDictionaryArray> = MutableDictionaryArray::new(); + + for value in [Some("A"), Some("B"), None, Some("C"), Some("A"), Some("B")] { + new.try_push(value).unwrap(); + } + + assert_eq!( + new.values().values(), + MutableUtf8Array::::from_iter_values(["A", "B", "C"].into_iter()).values() + ); + + let mut expected_keys = MutablePrimitiveArray::::from_slice(&[0, 1]); + expected_keys.push(None); + expected_keys.push(Some(2)); + expected_keys.push(Some(0)); + expected_keys.push(Some(1)); + assert_eq!(*new.keys(), expected_keys); + + let expected_map = ["A", "B", "C"] + .iter() + .enumerate() + .map(|(index, value)| { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + let hash = hasher.finish(); + (hash, index as i32) + }) + .collect::>(); + assert_eq!(*new.map(), expected_map); +} From e1f5f65c5efef99641de231f1a9f5f365bed2265 Mon Sep 17 00:00:00 2001 From: Ryan Date: Sat, 7 May 2022 08:48:11 +1000 Subject: [PATCH 3/3] Fix unexpected doctest error --- src/array/dictionary/mutable.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index d952e6cd4c5..a55f6539f50 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -22,7 +22,7 @@ use super::{DictionaryArray, DictionaryKey}; /// let mut array: MutableDictionaryArray> = MutableDictionaryArray::new(); /// array.try_push(Some("A"))?; /// array.try_push(Some("B"))?; -/// array.try_push(None)?; +/// array.push_null(); /// array.try_push(Some("C"))?; /// # Ok(()) /// # }