diff --git a/src/array/dictionary/iterator.rs b/src/array/dictionary/iterator.rs index 502af050f4e..7e33e67c9a8 100644 --- a/src/array/dictionary/iterator.rs +++ b/src/array/dictionary/iterator.rs @@ -1,59 +1,29 @@ +use crate::array::{ArrayAccessor, ArrayValuesIter}; use crate::bitmap::utils::{BitmapIter, ZipValidity}; use crate::scalar::Scalar; -use crate::trusted_len::TrustedLen; use super::{DictionaryArray, DictionaryKey}; -/// Iterator of values of an `ListArray`. -pub struct DictionaryValuesIter<'a, K: DictionaryKey> { - array: &'a DictionaryArray, - index: usize, - end: usize, -} - -impl<'a, K: DictionaryKey> DictionaryValuesIter<'a, K> { - #[inline] - pub fn new(array: &'a DictionaryArray) -> Self { - Self { - array, - index: 0, - end: array.len(), - } - } -} - -impl<'a, K: DictionaryKey> Iterator for DictionaryValuesIter<'a, K> { +unsafe impl<'a, K> ArrayAccessor<'a> for DictionaryArray +where + K: DictionaryKey, +{ type Item = Box; #[inline] - fn next(&mut self) -> Option { - if self.index == self.end { - return None; - } - let old = self.index; - self.index += 1; - Some(self.array.value(old)) + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + // safety: invariant of the trait + self.value_unchecked(index) } #[inline] - fn size_hint(&self) -> (usize, Option) { - (self.end - self.index, Some(self.end - self.index)) + fn len(&self) -> usize { + self.keys.len() } } -unsafe impl<'a, K: DictionaryKey> TrustedLen for DictionaryValuesIter<'a, K> {} - -impl<'a, K: DictionaryKey> DoubleEndedIterator for DictionaryValuesIter<'a, K> { - #[inline] - fn next_back(&mut self) -> Option { - if self.index == self.end { - None - } else { - self.end -= 1; - Some(self.array.value(self.end)) - } - } -} +/// Iterator of values of a [`DictionaryArray`]. +pub type DictionaryValuesIter<'a, K> = ArrayValuesIter<'a, DictionaryArray>; type ValuesIter<'a, K> = DictionaryValuesIter<'a, K>; type ZipIter<'a, K> = ZipValidity, ValuesIter<'a, K>, BitmapIter<'a>>; diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 02d72ccd260..e037d4c42f7 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -317,9 +317,23 @@ impl DictionaryArray { /// # Panic /// This function panics iff `index >= self.len()` #[inline] - pub fn value(&self, index: usize) -> Box { + pub fn value(&self, i: usize) -> Box { + assert!(i < self.len()); + unsafe { self.value_unchecked(i) } + } + + /// Returns the value of the [`DictionaryArray`] at position `i`. + /// # Implementation + /// This function will allocate a new [`Scalar`] and is usually not performant. + /// Consider calling `keys` and `values`, downcasting `values`, and iterating over that. + /// # Safety + /// This function is safe iff `index < self.len()` + #[inline] + pub unsafe fn value_unchecked(&self, index: usize) -> Box { + // safety - invariant of this function + let index = unsafe { self.keys.value_unchecked(index) }; // safety - invariant of this struct - let index = unsafe { self.keys.value(index).as_usize() }; + let index = unsafe { index.as_usize() }; new_scalar(self.values.as_ref(), index) } diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index 665d6fc9234..5edb436a381 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -3,6 +3,7 @@ use std::{collections::hash_map::DefaultHasher, sync::Arc}; use hash_hasher::HashedMap; +use crate::array::ArrayAccessor; use crate::{ array::{primitive::MutablePrimitiveArray, Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, @@ -109,8 +110,14 @@ impl MutableDictionaryArray { } /// pushes a null value + #[inline] pub fn push_null(&mut self) { - self.keys.push(None) + if self.values.is_empty() { + // keys's default value is 0. If self.values is empty, the 0th index + // would be out of bound. + self.values.push_null() + } + self.keys.push(None); } /// returns a mutable reference to the inner values. @@ -198,8 +205,9 @@ impl MutableArray for MutableDictio self } + #[inline] fn push_null(&mut self) { - self.keys.push(None) + self.push_null() } fn reserve(&mut self, additional: usize) { @@ -249,3 +257,26 @@ where } } } + +unsafe impl<'a, K, M, T: 'a> ArrayAccessor<'a> for MutableDictionaryArray +where + K: DictionaryKey, + M: MutableArray + ArrayAccessor<'a, Item = T>, +{ + type Item = T; + + #[inline] + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + // safety: invariant of the trait + let index = self.keys.value_unchecked(index); + // safety: invariant of the struct + let index = index.as_usize(); + // safety: invariant of the struct + self.values.value_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.keys.len() + } +} diff --git a/src/array/iterator.rs b/src/array/iterator.rs index 5e8ed44d861..7dbcc1bb89f 100644 --- a/src/array/iterator.rs +++ b/src/array/iterator.rs @@ -6,7 +6,7 @@ mod private { impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {} } -/// Sealed trait representing assess to a value of an array. +/// Sealed trait representing random access to a value of an array. /// # Safety /// Implementers of this trait guarantee that /// `value_unchecked` is safe when called up to `len` diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index feb5a5df93b..4c80ed3d8c9 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -264,14 +264,8 @@ impl ListArray { /// Returns the element at index `i` #[inline] pub fn value(&self, i: usize) -> Box { - let offset = self.offsets[i]; - let offset_1 = self.offsets[i + 1]; - let length = (offset_1 - offset).to_usize(); - - // Safety: - // One of the invariants of the struct - // is that offsets are in bounds - unsafe { self.values.slice_unchecked(offset.to_usize(), length) } + assert!(i < self.len()); + unsafe { self.value_unchecked(i) } } /// Returns the element at index `i` as &str diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index c71c41c164e..63ab6d2a775 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -1,7 +1,7 @@ use std::{iter::FromIterator, sync::Arc}; use crate::array::physical_binary::extend_validity; -use crate::array::TryExtendFromSelf; +use crate::array::{ArrayAccessor, TryExtendFromSelf}; use crate::bitmap::Bitmap; use crate::{ array::{Array, MutableArray, TryExtend, TryPush}, @@ -288,6 +288,11 @@ impl MutablePrimitiveArray { pub fn capacity(&self) -> usize { self.values.capacity() } + + /// Returns the capacity of this [`MutablePrimitiveArray`]. + pub fn len(&self) -> usize { + self.values.len() + } } /// Accessors @@ -667,3 +672,17 @@ impl TryExtendFromSelf for MutablePrimitiveArray { Ok(()) } } + +unsafe impl<'a, T: NativeType> ArrayAccessor<'a> for MutablePrimitiveArray { + type Item = T; + + #[inline] + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + *self.values.get_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } +} diff --git a/tests/it/array/dictionary/mutable.rs b/tests/it/array/dictionary/mutable.rs index 24b40f0a6f3..64fb5077ce6 100644 --- a/tests/it/array/dictionary/mutable.rs +++ b/tests/it/array/dictionary/mutable.rs @@ -74,3 +74,11 @@ fn push_utf8() { .collect::>(); assert_eq!(*new.map(), expected_map); } + +#[test] +fn iter() { + let values = Utf8Array::::from_slice(&["a", "aa"]); + let array = + DictionaryArray::try_from_keys(PrimitiveArray::from_vec(vec![1, 0]), values.boxed()) + .unwrap(); +}