From ffca863d80fd10f47e002fa449551d6fef4e4018 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 7 Nov 2021 09:23:06 +0100 Subject: [PATCH 1/4] fix growable negative keys --- src/array/growable/dictionary.rs | 2 +- tests/it/array/growable/dictionary.rs | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/array/growable/dictionary.rs b/src/array/growable/dictionary.rs index 4be93a53a2b..f21642c6a04 100644 --- a/src/array/growable/dictionary.rs +++ b/src/array/growable/dictionary.rs @@ -100,7 +100,7 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { self.key_values.extend( values .iter() - .map(|x| T::from_usize(offset + x.to_usize().unwrap()).unwrap()), + .map(|x| T::from_usize(offset + x.to_usize().unwrap_or(0)).unwrap()), ); } diff --git a/tests/it/array/growable/dictionary.rs b/tests/it/array/growable/dictionary.rs index 61bacafd9be..55d713e7d42 100644 --- a/tests/it/array/growable/dictionary.rs +++ b/tests/it/array/growable/dictionary.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use arrow2::array::growable::{Growable, GrowableDictionary}; use arrow2::array::*; +use arrow2::datatypes::DataType; use arrow2::error::Result; #[test] @@ -29,6 +30,22 @@ fn test_single() -> Result<()> { Ok(()) } +#[test] +fn test_negative_keys() { + let vals = vec![Some("a"), Some("b"), Some("c")]; + let keys = vec![0, 1, 2, -1]; + + let keys = PrimitiveArray::from_data( + DataType::Int32, + keys.into(), + Some(vec![true, true, true, false].into()), + ); + + let arr = DictionaryArray::from_data(keys, Arc::new(Utf8Array::::from(vals))); + // check that we don't panic with negative keys to usize conversion + arrow2::compute::concat::concatenate(&[&arr]).unwrap(); +} + #[test] fn test_multi() -> Result<()> { let mut original_data1 = vec![Some("a"), Some("b"), None, Some("a")]; From 695278f892fd8817e08b35cd6f0a94de67a410d6 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 7 Nov 2021 09:31:20 +0100 Subject: [PATCH 2/4] update dictionary scalar; keys may be null --- src/array/dictionary/mod.rs | 9 +++++++-- tests/it/array/growable/dictionary.rs | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 012d937d6d9..a41b05614f1 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -14,6 +14,7 @@ pub use iterator::*; pub use mutable::*; use super::{new_empty_array, primitive::PrimitiveArray, Array}; +use crate::scalar::NullScalar; /// Trait denoting [`NativeType`]s that can be used as keys of a dictionary. pub trait DictionaryKey: @@ -127,8 +128,12 @@ impl DictionaryArray { /// Returns the value of the [`DictionaryArray`] at position `i`. #[inline] pub fn value(&self, index: usize) -> Box { - let index = self.keys.value(index).to_usize().unwrap(); - new_scalar(self.values.as_ref(), index) + if self.keys.is_null(index) { + Box::new(NullScalar::new()) + } else { + let index = self.keys.value(index).to_usize().unwrap(); + new_scalar(self.values.as_ref(), index) + } } } diff --git a/tests/it/array/growable/dictionary.rs b/tests/it/array/growable/dictionary.rs index 55d713e7d42..141df18626c 100644 --- a/tests/it/array/growable/dictionary.rs +++ b/tests/it/array/growable/dictionary.rs @@ -43,7 +43,9 @@ fn test_negative_keys() { let arr = DictionaryArray::from_data(keys, Arc::new(Utf8Array::::from(vals))); // check that we don't panic with negative keys to usize conversion - arrow2::compute::concat::concatenate(&[&arr]).unwrap(); + let out = arrow2::compute::concat::concatenate(&[&arr]).unwrap(); + let out = out.as_any().downcast_ref::>().unwrap(); + assert_eq!(out, &arr); } #[test] From 316c26c8b73cf8100f9ff0db6345fb1e63d1edcc Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 7 Nov 2021 12:21:19 +0100 Subject: [PATCH 3/4] Update src/array/growable/dictionary.rs Co-authored-by: Jorge Leitao --- src/array/growable/dictionary.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/array/growable/dictionary.rs b/src/array/growable/dictionary.rs index f21642c6a04..7fe1040cea4 100644 --- a/src/array/growable/dictionary.rs +++ b/src/array/growable/dictionary.rs @@ -100,6 +100,7 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { self.key_values.extend( values .iter() + // `.unwrap_or(0)` because this operation does not check for null values, which may contain any key. .map(|x| T::from_usize(offset + x.to_usize().unwrap_or(0)).unwrap()), ); } From 0517b952247e9e88f41ca0cfecbc29fe9298476d Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 7 Nov 2021 15:51:05 +0100 Subject: [PATCH 4/4] use growable api for test --- tests/it/array/growable/dictionary.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/it/array/growable/dictionary.rs b/tests/it/array/growable/dictionary.rs index 141df18626c..8c27fc50070 100644 --- a/tests/it/array/growable/dictionary.rs +++ b/tests/it/array/growable/dictionary.rs @@ -43,9 +43,10 @@ fn test_negative_keys() { let arr = DictionaryArray::from_data(keys, Arc::new(Utf8Array::::from(vals))); // check that we don't panic with negative keys to usize conversion - let out = arrow2::compute::concat::concatenate(&[&arr]).unwrap(); - let out = out.as_any().downcast_ref::>().unwrap(); - assert_eq!(out, &arr); + let mut growable = GrowableDictionary::new(&[&arr], false, 0); + growable.extend(0, 0, 4); + let out: DictionaryArray = growable.into(); + assert_eq!(out, arr); } #[test]