From e84f00ce5c12c27bee0d53cf46c94b86af55f184 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 5 Dec 2023 16:43:31 +0100 Subject: [PATCH 1/3] feat: add `FixedSizeListArray` --- src/array/boolean.rs | 2 +- src/array/fixed_size_list.rs | 266 ++++++++++++++++++++++++++++++ src/array/fixed_size_primitive.rs | 2 +- src/array/mod.rs | 3 + src/array/null.rs | 4 +- src/array/string.rs | 30 +++- src/array/struct.rs | 2 +- src/array/variable_size_binary.rs | 34 ++++ src/bitmap/mod.rs | 4 +- src/nullable.rs | 14 +- 10 files changed, 345 insertions(+), 16 deletions(-) create mode 100644 src/array/fixed_size_list.rs diff --git a/src/array/boolean.rs b/src/array/boolean.rs index 98d49773..3db0bb88 100644 --- a/src/array/boolean.rs +++ b/src/array/boolean.rs @@ -74,7 +74,7 @@ where Bitmap: FromIterator, { fn from(value: BooleanArray) -> Self { - Self(Nullable::wrap(value.0)) + Self(Nullable::from(value.0)) } } diff --git a/src/array/fixed_size_list.rs b/src/array/fixed_size_list.rs new file mode 100644 index 00000000..aad3c4a0 --- /dev/null +++ b/src/array/fixed_size_list.rs @@ -0,0 +1,266 @@ +//! Array with fixed-size sequences of elements. + +use std::{ + iter, + mem::{self, ManuallyDrop, MaybeUninit}, +}; + +use crate::{ + bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap}, + buffer::{BufferMut, BufferType, VecBuffer}, + nullable::Nullable, + validity::Validity, + Index, Length, +}; + +use super::Array; + +/// Array with fixed-size sequences of elements. +pub struct FixedSizeListArray< + const N: usize, + T: Array, + const NULLABLE: bool = false, + Buffer: BufferType = VecBuffer, +>(>::Storage) +where + T: Validity; + +impl Array + for FixedSizeListArray +where + T: Validity, +{ +} + +impl BitmapRef + for FixedSizeListArray +{ + type Buffer = Buffer; + + fn bitmap_ref(&self) -> &Bitmap { + self.0.bitmap_ref() + } +} + +impl BitmapRefMut + for FixedSizeListArray +{ + fn bitmap_ref_mut(&mut self) -> &mut Bitmap { + self.0.bitmap_ref_mut() + } +} + +impl Default + for FixedSizeListArray +where + T: Validity, + >::Storage: Default, +{ + fn default() -> Self { + Self(Default::default()) + } +} + +impl Extend + for FixedSizeListArray +where + T: Validity, + >::Storage: Extend, +{ + fn extend>(&mut self, iter: I) { + self.0.extend(iter); + } +} + +impl From> + for FixedSizeListArray +where + T: Length, + Bitmap: FromIterator, +{ + fn from(value: FixedSizeListArray) -> Self { + Self(Nullable::from(value.0)) + } +} + +impl FromIterator<[U; N]> + for FixedSizeListArray +where + T: FromIterator, +{ + fn from_iter>(iter: I) -> Self { + Self(iter.into_iter().flatten().collect()) + } +} + +impl FromIterator> + for FixedSizeListArray +where + [U; N]: Default, + T: FromIterator, + ::Buffer: Default + BufferMut + Extend, +{ + fn from_iter>>(iter: I) -> Self { + let mut validity = Bitmap::default(); + let data = iter + .into_iter() + .inspect(|opt| { + validity.extend(iter::once(opt.is_some())); + }) + .flat_map(Option::unwrap_or_default) + .collect(); + Self(Nullable { data, validity }) + } +} + +impl Index for FixedSizeListArray +where + T: Index, +{ + type Item<'a> = [::Item<'a>; N] + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + // Following https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#initializing-an-array-element-by-element + let data = { + let mut data: [MaybeUninit<_>; N] = MaybeUninit::uninit().assume_init(); + let start_index = index * N; + let end_index = start_index + N; + (start_index..end_index) + .enumerate() + .for_each(|(array_index, child_index)| { + data[array_index].write(self.0.index_unchecked(child_index)); + }); + // https://github.com/rust-lang/rust/issues/61956 + mem::transmute_copy(&ManuallyDrop::new(data)) + }; + data + } +} + +impl Index for FixedSizeListArray +where + T: Index, +{ + type Item<'a> = Option<[::Item<'a>; N]> + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.is_valid_unchecked(index).then(|| { + // Following https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#initializing-an-array-element-by-element + let data = { + let mut data: [MaybeUninit<_>; N] = MaybeUninit::uninit().assume_init(); + let start_index = index * N; + let end_index = start_index + N; + (start_index..end_index) + .enumerate() + .for_each(|(array_index, child_index)| { + data[array_index].write(self.0.data.index_unchecked(child_index)); + }); + // https://github.com/rust-lang/rust/issues/61956 + mem::transmute_copy(&ManuallyDrop::new(data)) + }; + data + }) + } +} + +impl Length + for FixedSizeListArray +where + T: Validity, + >::Storage: Length, +{ + fn len(&self) -> usize { + if NULLABLE { + // This uses the length of the validity bitmap + self.0.len() + } else { + self.0.len() / N + } + } +} + +impl ValidityBitmap + for FixedSizeListArray +{ +} + +#[cfg(test)] +mod tests { + use crate::array::{FixedSizePrimitiveArray, StringArray}; + + use super::*; + + #[test] + fn from_iter() { + let input = [[1_u8, 2], [3, 4]]; + let array = input + .into_iter() + .collect::>>(); + assert_eq!(array.len(), 2); + + let input_nullable = [Some([1_u8, 2]), None]; + let array_nullable = + input_nullable + .into_iter() + .collect::, true>>(); + assert_eq!(array_nullable.len(), 2); + + let input_string = [["hello", "world"], ["!", "!"]]; + let array_string = input_string + .into_iter() + .collect::>(); + assert_eq!(array_string.len(), 2); + } + + #[test] + fn index() { + let input = [[1_u8, 2], [3, 4]]; + let array = input + .into_iter() + .collect::>>(); + assert_eq!(array.index(0), Some([&1, &2])); + assert_eq!(array.index(1), Some([&3, &4])); + + let input_string = [["hello", "world"], ["!", "!"]]; + let array_string = input_string + .into_iter() + .collect::>(); + assert_eq!(array_string.index(0), Some(["hello", "world"])); + assert_eq!(array_string.index(1), Some(["!", "!"])); + + let input_nullable_string = [Some(["hello", "world"]), None]; + let array_nullable_string = input_nullable_string + .into_iter() + .collect::>(); + assert_eq!( + array_nullable_string.index(0), + Some(Some(["hello", "world"])) + ); + assert_eq!(array_nullable_string.index(1), Some(None)); + assert_eq!(array_nullable_string.index(2), None); + + let input_nullable_string_nullable = [ + Some([Some("hello"), None]), + None, + Some([None, Some("world")]), + ]; + let array_nullable_string_nullable = input_nullable_string_nullable + .into_iter() + .collect::, true>>( + ); + assert_eq!( + array_nullable_string_nullable.index(0), + Some(Some([Some("hello"), None])) + ); + assert_eq!(array_nullable_string_nullable.index(1), Some(None)); + assert_eq!( + array_nullable_string_nullable.index(2), + Some(Some([None, Some("world")])) + ); + assert_eq!(array_nullable_string_nullable.index(3), None); + } +} diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 6011795f..ff560242 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -81,7 +81,7 @@ where Bitmap: FromIterator, { fn from(value: FixedSizePrimitiveArray) -> Self { - Self(Nullable::wrap(value.0)) + Self(Nullable::from(value.0)) } } diff --git a/src/array/mod.rs b/src/array/mod.rs index a705b5ae..3b260874 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -10,6 +10,9 @@ use std::collections::VecDeque; mod boolean; pub use boolean::*; +mod fixed_size_list; +pub use fixed_size_list::*; + mod fixed_size_primitive; pub use fixed_size_primitive::*; diff --git a/src/array/null.rs b/src/array/null.rs index 52e70a93..0804915e 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -75,7 +75,7 @@ where Bitmap: FromIterator, { fn from(value: NullArray) -> Self { - Self(Nullable::wrap(value.0)) + Self(Nullable::from(value.0)) } } @@ -275,7 +275,7 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "should be < len")] fn index_out_of_bounds() { let array = [(); 1].iter().copied().collect::(); array.index_checked(1); diff --git a/src/array/string.rs b/src/array/string.rs index a8e59820..7a30ec26 100644 --- a/src/array/string.rs +++ b/src/array/string.rs @@ -1,12 +1,14 @@ //! Array with string values. +use std::str; + use super::{Array, VariableSizeBinaryArray}; use crate::{ bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap}, buffer::{BufferType, VecBuffer}, offset::OffsetElement, validity::Validity, - Length, + Index, Length, }; /// Array with string values. @@ -141,6 +143,32 @@ where } } +impl Index + for StringArray +{ + type Item<'a> = &'a str + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + str::from_utf8_unchecked(self.0.index_unchecked(index)) + } +} + +impl Index + for StringArray +{ + type Item<'a> = Option<&'a str> + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.0 + .index_unchecked(index) + .map(|bytes| str::from_utf8_unchecked(bytes)) + } +} + impl Length for StringArray where diff --git a/src/array/struct.rs b/src/array/struct.rs index 47047c69..1eebd939 100644 --- a/src/array/struct.rs +++ b/src/array/struct.rs @@ -51,7 +51,7 @@ where Bitmap: FromIterator, { fn from(value: StructArray) -> Self { - Self(Nullable::wrap(value.0)) + Self(Nullable::from(value.0)) } } diff --git a/src/array/variable_size_binary.rs b/src/array/variable_size_binary.rs index d0cce901..a90d12bc 100644 --- a/src/array/variable_size_binary.rs +++ b/src/array/variable_size_binary.rs @@ -144,6 +144,40 @@ where } } +impl Index + for VariableSizeBinaryArray +where + ::Buffer: Index, +{ + type Item<'a> = Option<&'a [u8]> + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.0.is_valid_unchecked(index).then(|| { + let start: usize = self + .0 + .offsets + .data + .as_slice() + .index_unchecked(index) + .to_owned() + .try_into() + .expect("convert fail"); + let end: usize = self + .0 + .offsets + .data + .as_slice() + .index_unchecked(index + 1) + .to_owned() + .try_into() + .expect("convert fail"); + &self.0.data.0.as_slice()[start..end] + }) + } +} + impl Length for VariableSizeBinaryArray where diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index bc464f82..9473c303 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -410,7 +410,7 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "out of bounds")] fn as_ref_u8_out_of_bounds() { let bitmap = [false, true, false, true, false, true] .iter() @@ -440,7 +440,7 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "should be < len")] fn as_ref_bitslice_out_of_bounds() { let bitmap = [false, true, false, true, false, true] .iter() diff --git a/src/nullable.rs b/src/nullable.rs index 226da660..5813b1c4 100644 --- a/src/nullable.rs +++ b/src/nullable.rs @@ -23,13 +23,11 @@ pub struct Nullable { pub(crate) validity: Bitmap, } -impl Nullable { - /// Adds a validity bitmap to `T`, with all bits set. - pub(crate) fn wrap(data: T) -> Self - where - T: Length, - Bitmap: FromIterator, - { +impl From for Nullable +where + Bitmap: FromIterator, +{ + fn from(data: T) -> Self { let validity = Bitmap::new_valid(data.len()); Self { data, validity } } @@ -285,7 +283,7 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "should be < len")] fn index_checked() { let input = [Some(1), None]; let nullable = input.into_iter().collect::>>(); From b6316436287a64547e32942eef117fbba8283b14 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 6 Dec 2023 13:03:37 +0100 Subject: [PATCH 2/3] Add `IntoIterator` implementation, change `ArrayType` for `[T: FixedSize; N]` --- src/array/fixed_size_list.rs | 117 +++++++++++++++++++++++++++++++++-- src/array/mod.rs | 14 ++++- src/array/struct.rs | 2 +- 3 files changed, 124 insertions(+), 9 deletions(-) diff --git a/src/array/fixed_size_list.rs b/src/array/fixed_size_list.rs index aad3c4a0..1a0697e1 100644 --- a/src/array/fixed_size_list.rs +++ b/src/array/fixed_size_list.rs @@ -61,14 +61,31 @@ where } } -impl Extend - for FixedSizeListArray +impl Extend<[U; N]> + for FixedSizeListArray where - T: Validity, - >::Storage: Extend, + T: Extend, { - fn extend>(&mut self, iter: I) { - self.0.extend(iter); + fn extend>(&mut self, iter: I) { + self.0.extend(iter.into_iter().flatten()); + } +} + +impl Extend> + for FixedSizeListArray +where + [U; N]: Default, + T: Extend, + Bitmap: Extend, +{ + fn extend>>(&mut self, iter: I) { + self.0.data.extend( + iter.into_iter() + .inspect(|opt| { + self.0.validity.extend(iter::once(opt.is_some())); + }) + .flat_map(Option::unwrap_or_default), + ); } } @@ -157,6 +174,7 @@ where (start_index..end_index) .enumerate() .for_each(|(array_index, child_index)| { + // Here we need to index in the data data[array_index].write(self.0.data.index_unchecked(child_index)); }); // https://github.com/rust-lang/rust/issues/61956 @@ -167,6 +185,53 @@ where } } +/// An iterator over fixed-size lists in a [`FixedSizeListArray`]. +pub struct FixedSizeListIter<'a, const N: usize, T: Array, const NULLABLE: bool, Buffer: BufferType> +where + T: Validity, +{ + /// Reference to the array. + array: &'a FixedSizeListArray, + /// Current index. + index: usize, +} + +impl<'a, const N: usize, T: Array, const NULLABLE: bool, Buffer: BufferType> Iterator + for FixedSizeListIter<'a, N, T, NULLABLE, Buffer> +where + T: Validity, + FixedSizeListArray: Length + Index, +{ + type Item = as Index>::Item<'a>; + + fn next(&mut self) -> Option { + self.array + .index(self.index) + .into_iter() + .inspect(|_| { + self.index += 1; + }) + .next() + } +} + +impl<'a, const N: usize, T: Array, const NULLABLE: bool, Buffer: BufferType> IntoIterator + for &'a FixedSizeListArray +where + FixedSizeListArray: Index + Length, + T: Validity, +{ + type Item = as Index>::Item<'a>; + type IntoIter = FixedSizeListIter<'a, N, T, NULLABLE, Buffer>; + + fn into_iter(self) -> Self::IntoIter { + FixedSizeListIter { + array: self, + index: 0, + } + } +} + impl Length for FixedSizeListArray where @@ -263,4 +328,44 @@ mod tests { ); assert_eq!(array_nullable_string_nullable.index(3), None); } + + #[test] + fn into_iter() { + let input = [[1_u8, 2], [3, 4]]; + let array = input + .into_iter() + .collect::>>(); + assert_eq!(array.into_iter().collect::>(), [[&1, &2], [&3, &4]]); + + let input_string = [["hello", "world"], ["!", "!"]]; + let array_string = input_string + .into_iter() + .collect::>(); + assert_eq!(array_string.into_iter().collect::>(), input_string); + + let input_nullable_string = [Some(["hello", "world"]), None]; + let array_nullable_string = input_nullable_string + .into_iter() + .collect::>(); + assert_eq!( + array_nullable_string.into_iter().collect::>(), + input_nullable_string + ); + + let input_nullable_string_nullable = [ + Some([Some("hello"), None]), + None, + Some([None, Some("world")]), + ]; + let array_nullable_string_nullable = input_nullable_string_nullable + .into_iter() + .collect::, true>>( + ); + assert_eq!( + array_nullable_string_nullable + .into_iter() + .collect::>(), + input_nullable_string_nullable + ); + } } diff --git a/src/array/mod.rs b/src/array/mod.rs index 3b260874..77eb4ffb 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -114,11 +114,21 @@ impl_array_type!(Option<()>, NullArray<(), true, Buffer>); impl ArrayType for [T; N] { type Array = - FixedSizePrimitiveArray<[T; N], false, Buffer>; + FixedSizeListArray< + N, + ::Array, + false, + Buffer, + >; } impl ArrayType for Option<[T; N]> { type Array = - FixedSizePrimitiveArray<[T; N], true, Buffer>; + FixedSizeListArray< + N, + ::Array, + true, + Buffer, + >; } impl ArrayType for str { diff --git a/src/array/struct.rs b/src/array/struct.rs index 1eebd939..76ffe229 100644 --- a/src/array/struct.rs +++ b/src/array/struct.rs @@ -304,7 +304,7 @@ mod tests { assert_eq!(array.0.c.into_iter().collect::>(), &[(), (), (), ()]); assert_eq!( array.0.d.into_iter().collect::>(), - &[Some([1, 2]), Some([3, 4]), None, None] + &[Some([&1, &2]), Some([&3, &4]), None, None] ); assert_eq!( array.0.e.into_iter().collect::>(), From 63c6e8df8df779940fab20a1c418e275732ba5d2 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 6 Dec 2023 13:10:44 +0100 Subject: [PATCH 3/3] Add nested test --- src/array/fixed_size_list.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/array/fixed_size_list.rs b/src/array/fixed_size_list.rs index 1a0697e1..4e839b8a 100644 --- a/src/array/fixed_size_list.rs +++ b/src/array/fixed_size_list.rs @@ -367,5 +367,18 @@ mod tests { .collect::>(), input_nullable_string_nullable ); + + let input_nested = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 0], [0, 0]]]; + let array_nested = input_nested + .into_iter() + .collect::>>>(); + assert_eq!( + array_nested.into_iter().collect::>(), + [ + [[&1, &2], [&3, &4], [&5, &6]], + [[&7, &8], [&9, &0], [&0, &0]] + ] + ); + assert_eq!(array_nested.0 .0 .0, [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0]); } }