diff --git a/src/array/fixed_size_list/mutable.rs b/src/array/fixed_size_list/mutable.rs index 7409aaca66e..5f89920a292 100644 --- a/src/array/fixed_size_list/mutable.rs +++ b/src/array/fixed_size_list/mutable.rs @@ -1,13 +1,10 @@ use std::sync::Arc; use crate::{ - array::{ - Array, MutableArray, MutableBinaryArray, MutablePrimitiveArray, MutableUtf8Array, Offset, - }, + array::{Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, datatypes::DataType, error::{ArrowError, Result}, - types::NativeType, }; use super::FixedSizeListArray; @@ -32,6 +29,7 @@ impl From> for FixedSizeListArray } impl MutableFixedSizeListArray { + /// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size. pub fn new(values: M, size: usize) -> Self { let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size); assert_eq!(values.len(), 0); @@ -43,20 +41,42 @@ impl MutableFixedSizeListArray { } } - pub fn mut_values(&mut self) -> &mut M { + fn mut_values(&mut self) -> &mut M { &mut self.values } + /// The inner values pub fn values(&self) -> &M { &self.values } fn init_validity(&mut self) { - self.validity = Some(MutableBitmap::from_trusted_len_iter( - std::iter::repeat(true) - .take(self.values.len() - 1) - .chain(std::iter::once(false)), - )) + let len = self.values.len() / self.size; + + let mut validity = MutableBitmap::new(); + validity.extend_constant(len, true); + validity.set(len - 1, false); + self.validity = Some(validity) + } + + #[inline] + fn try_push_valid(&mut self) -> Result<()> { + if self.values.len() % self.size != 0 { + return Err(ArrowError::KeyOverflowError); + }; + if let Some(validity) = &mut self.validity { + validity.push(true) + } + Ok(()) + } + + #[inline] + fn push_null(&mut self) { + (0..self.size).for_each(|_| self.values.push_null()); + match &mut self.validity { + Some(validity) => validity.push(false), + None => self.init_validity(), + } } } @@ -97,6 +117,7 @@ impl MutableArray for MutableFixedSizeListArray { self } + #[inline] fn push_null(&mut self) { (0..self.size).for_each(|_| { self.values.push_null(); @@ -109,69 +130,33 @@ impl MutableArray for MutableFixedSizeListArray { } } -impl MutableFixedSizeListArray> { - pub fn try_from_iter>, I: IntoIterator>>( - iter: I, - size: usize, - data_type: DataType, - ) -> Result { - let iterator = iter.into_iter(); - let (lower, _) = iterator.size_hint(); - let array = MutablePrimitiveArray::::with_capacity_from(lower * size, data_type); - let mut array = MutableFixedSizeListArray::new(array, size); - for items in iterator { - if let Some(items) = items { - let values = array.mut_values(); - let len = values.len(); - values.extend(items); - if values.len() - len != size { - return Err(ArrowError::InvalidArgumentError( - "A FixedSizeList must have all its values with the same size".to_string(), - )); - }; - } else { - array.push_null(); - } +impl TryExtend> for MutableFixedSizeListArray +where + M: MutableArray + TryExtend>, + I: IntoIterator>, +{ + #[inline] + fn try_extend>>(&mut self, iter: II) -> Result<()> { + for items in iter { + self.try_push(items)?; } - Ok(array) + Ok(()) } } -macro_rules! impl_offsets { - ($mutable:ident, $type:ty) => { - impl MutableFixedSizeListArray<$mutable> { - pub fn try_from_iter< - T: AsRef<$type>, - P: IntoIterator>, - I: IntoIterator>, - >( - iter: I, - size: usize, - ) -> Result { - let iterator = iter.into_iter(); - let (lower, _) = iterator.size_hint(); - let array = $mutable::::with_capacity(lower * size); - let mut array = MutableFixedSizeListArray::new(array, size); - for items in iterator { - if let Some(items) = items { - let values = array.mut_values(); - let len = values.len(); - values.extend(items); - if values.len() - len != size { - return Err(ArrowError::InvalidArgumentError( - "A FixedSizeList must have all its values with the same size" - .to_string(), - )); - }; - } else { - array.push_null(); - } - } - Ok(array) - } +impl TryPush> for MutableFixedSizeListArray +where + M: MutableArray + TryExtend>, + I: IntoIterator>, +{ + #[inline] + fn try_push(&mut self, item: Option) -> Result<()> { + if let Some(items) = item { + self.mut_values().try_extend(items)?; + self.try_push_valid()?; + } else { + self.push_null(); } - }; + Ok(()) + } } - -impl_offsets!(MutableUtf8Array, str); -impl_offsets!(MutableBinaryArray, [u8]); diff --git a/src/array/growable/fixed_size_list.rs b/src/array/growable/fixed_size_list.rs new file mode 100644 index 00000000000..cabd0b75928 --- /dev/null +++ b/src/array/growable/fixed_size_list.rs @@ -0,0 +1,108 @@ +use std::sync::Arc; + +use crate::{ + array::{Array, FixedSizeListArray}, + bitmap::MutableBitmap, + datatypes::DataType, +}; + +use super::{ + make_growable, + utils::{build_extend_null_bits, ExtendNullBits}, + Growable, +}; + +/// Concrete [`Growable`] for the [`FixedSizeListArray`]. +pub struct GrowableFixedSizeList<'a> { + arrays: Vec<&'a FixedSizeListArray>, + validity: MutableBitmap, + values: Box + 'a>, + extend_null_bits: Vec>, + size: usize, +} + +impl<'a> GrowableFixedSizeList<'a> { + /// Creates a new [`GrowableList`] bound to `arrays` with a pre-allocated `capacity`. + /// # Panics + /// If `arrays` is empty. + pub fn new( + arrays: Vec<&'a FixedSizeListArray>, + mut use_validity: bool, + capacity: usize, + ) -> Self { + assert!(!arrays.is_empty()); + + // if any of the arrays has nulls, insertions from any array requires setting bits + // as there is at least one array with nulls. + if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { + use_validity = true; + }; + + let size = + if let DataType::FixedSizeList(_, size) = &arrays[0].data_type().to_logical_type() { + *size as usize + } else { + unreachable!("`GrowableFixedSizeList` expects `DataType::FixedSizeList`") + }; + + let extend_null_bits = arrays + .iter() + .map(|array| build_extend_null_bits(*array, use_validity)) + .collect(); + + let inner = arrays + .iter() + .map(|array| array.values().as_ref()) + .collect::>(); + let values = make_growable(&inner, use_validity, 0); + + Self { + arrays, + values, + validity: MutableBitmap::with_capacity(capacity), + extend_null_bits, + size, + } + } + + fn to(&mut self) -> FixedSizeListArray { + let validity = std::mem::take(&mut self.validity); + let values = self.values.as_arc(); + + FixedSizeListArray::from_data(self.arrays[0].data_type().clone(), values, validity.into()) + } +} + +impl<'a> Growable<'a> for GrowableFixedSizeList<'a> { + fn extend(&mut self, index: usize, start: usize, len: usize) { + (self.extend_null_bits[index])(&mut self.validity, start, len); + self.values + .extend(index, start * self.size, len * self.size); + } + + fn extend_validity(&mut self, additional: usize) { + self.values.extend_validity(additional * self.size); + self.validity.extend_constant(additional, false); + } + + fn as_arc(&mut self) -> Arc { + Arc::new(self.to()) + } + + fn as_box(&mut self) -> Box { + Box::new(self.to()) + } +} + +impl<'a> From> for FixedSizeListArray { + fn from(val: GrowableFixedSizeList<'a>) -> Self { + let mut values = val.values; + let values = values.as_arc(); + + Self::from_data( + val.arrays[0].data_type().clone(), + values, + val.validity.into(), + ) + } +} diff --git a/src/array/growable/mod.rs b/src/array/growable/mod.rs index f9c3bf6ce25..83815e936e1 100644 --- a/src/array/growable/mod.rs +++ b/src/array/growable/mod.rs @@ -18,6 +18,8 @@ mod list; pub use list::GrowableList; mod structure; pub use structure::GrowableStruct; +mod fixed_size_list; +pub use fixed_size_list::GrowableFixedSizeList; mod utf8; pub use utf8::GrowableUtf8; mod dictionary; @@ -201,7 +203,17 @@ pub fn make_growable<'a>( capacity, )) } - FixedSizeList => todo!(), + FixedSizeList => { + let arrays = arrays + .iter() + .map(|array| array.as_any().downcast_ref().unwrap()) + .collect::>(); + Box::new(fixed_size_list::GrowableFixedSizeList::new( + arrays, + use_validity, + capacity, + )) + } Union => todo!(), Dictionary(key_type) => { with_match_physical_dictionary_key_type!(key_type, |$T| { diff --git a/tests/it/array/equal/fixed_size_list.rs b/tests/it/array/equal/fixed_size_list.rs index 9e26f7523e3..71ffb42aa8a 100644 --- a/tests/it/array/equal/fixed_size_list.rs +++ b/tests/it/array/equal/fixed_size_list.rs @@ -1,5 +1,5 @@ use arrow2::{ - array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray}, + array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray, TryExtend}, datatypes::DataType, }; @@ -16,9 +16,9 @@ fn create_fixed_size_list_array, T: AsRef<[Option]>>( }) }); - MutableFixedSizeListArray::>::try_from_iter(data, 3, DataType::Int32) - .unwrap() - .into() + let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::::new(), 3); + list.try_extend(data).unwrap(); + list.into() } #[test] diff --git a/tests/it/array/fixed_size_list/mutable.rs b/tests/it/array/fixed_size_list/mutable.rs index 31e2f11c164..0ff2ef2f962 100644 --- a/tests/it/array/fixed_size_list/mutable.rs +++ b/tests/it/array/fixed_size_list/mutable.rs @@ -9,14 +9,9 @@ fn primitive() { Some(vec![Some(4), None, Some(6)]), ]; - let list: FixedSizeListArray = - MutableFixedSizeListArray::>::try_from_iter( - data, - 3, - DataType::Int32, - ) - .unwrap() - .into(); + let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::::new(), 3); + list.try_extend(data).unwrap(); + let list: FixedSizeListArray = list.into(); let a = list.value(0); let a = a.as_any().downcast_ref::().unwrap(); diff --git a/tests/it/array/growable/fixed_size_list.rs b/tests/it/array/growable/fixed_size_list.rs new file mode 100644 index 00000000000..abf95fd66b3 --- /dev/null +++ b/tests/it/array/growable/fixed_size_list.rs @@ -0,0 +1,85 @@ +use arrow2::array::{ + growable::{Growable, GrowableFixedSizeList}, + FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray, TryExtend, +}; + +fn create_list_array(data: Vec>>>) -> FixedSizeListArray { + let mut array = MutableFixedSizeListArray::new(MutablePrimitiveArray::::new(), 3); + array.try_extend(data).unwrap(); + array.into() +} + +#[test] +fn basic() { + let data = vec![ + Some(vec![Some(1i32), Some(2), Some(3)]), + Some(vec![Some(4), Some(5), Some(6)]), + Some(vec![Some(7i32), Some(8), Some(9)]), + ]; + + let array = create_list_array(data); + + let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); + a.extend(0, 0, 1); + + let result: FixedSizeListArray = a.into(); + + let expected = vec![Some(vec![Some(1i32), Some(2), Some(3)])]; + let expected = create_list_array(expected); + + assert_eq!(result, expected) +} + +#[test] +fn null_offset() { + let data = vec![ + Some(vec![Some(1i32), Some(2), Some(3)]), + None, + Some(vec![Some(6i32), Some(7), Some(8)]), + ]; + let array = create_list_array(data); + return; + let array = array.slice(1, 2); + + let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); + a.extend(0, 1, 1); + + let result: FixedSizeListArray = a.into(); + + let expected = vec![Some(vec![Some(6i32), Some(7), Some(8)])]; + let expected = create_list_array(expected); + + assert_eq!(result, expected) +} + +#[test] +fn test_from_two_lists() { + let data_1 = vec![ + Some(vec![Some(1i32), Some(2), Some(3)]), + None, + Some(vec![Some(6i32), None, Some(8)]), + ]; + let array_1 = create_list_array(data_1); + + let data_2 = vec![ + Some(vec![Some(8i32), Some(7), Some(6)]), + Some(vec![Some(5i32), None, Some(4)]), + Some(vec![Some(2i32), Some(1), Some(0)]), + ]; + let array_2 = create_list_array(data_2); + + let mut a = GrowableFixedSizeList::new(vec![&array_1, &array_2], false, 6); + a.extend(0, 0, 2); + a.extend(1, 1, 1); + + let result: FixedSizeListArray = a.into(); + + let expected = vec![ + Some(vec![Some(1i32), Some(2), Some(3)]), + None, + Some(vec![Some(5i32), None, Some(4)]), + ]; + let expected = create_list_array(expected); + + assert_eq!(result, expected); +} diff --git a/tests/it/array/growable/mod.rs b/tests/it/array/growable/mod.rs index 48ac9b440e0..fcefae2c5a2 100644 --- a/tests/it/array/growable/mod.rs +++ b/tests/it/array/growable/mod.rs @@ -2,6 +2,7 @@ mod binary; mod boolean; mod dictionary; mod fixed_binary; +mod fixed_size_list; mod list; mod null; mod primitive;