From a7df70b71d7ddd77a2a1430efaca18a79f2a31b7 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Mon, 16 Jan 2023 17:06:23 -0500 Subject: [PATCH] Correctly update child-offsets in `GrowableUnion` (#1360) --- src/array/growable/binary.rs | 5 ++ src/array/growable/boolean.rs | 5 ++ src/array/growable/dictionary.rs | 5 ++ src/array/growable/fixed_binary.rs | 5 ++ src/array/growable/fixed_size_list.rs | 5 ++ src/array/growable/list.rs | 5 ++ src/array/growable/mod.rs | 3 + src/array/growable/null.rs | 5 ++ src/array/growable/primitive.rs | 5 ++ src/array/growable/structure.rs | 12 ++++ src/array/growable/union.rs | 14 +++- src/array/growable/utf8.rs | 5 ++ tests/it/array/growable/binary.rs | 5 ++ tests/it/array/growable/boolean.rs | 1 + tests/it/array/growable/dictionary.rs | 2 + tests/it/array/growable/fixed_binary.rs | 5 ++ tests/it/array/growable/fixed_size_list.rs | 3 + tests/it/array/growable/list.rs | 5 ++ tests/it/array/growable/null.rs | 1 + tests/it/array/growable/primitive.rs | 5 ++ tests/it/array/growable/struct_.rs | 4 ++ tests/it/array/growable/union.rs | 83 ++++++++++++++++++++++ tests/it/array/growable/utf8.rs | 4 ++ 23 files changed, 190 insertions(+), 2 deletions(-) diff --git a/src/array/growable/binary.rs b/src/array/growable/binary.rs index 5b43c9fcdf4..53ff0ae4feb 100644 --- a/src/array/growable/binary.rs +++ b/src/array/growable/binary.rs @@ -81,6 +81,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.offsets.len() - 1 + } + fn as_arc(&mut self) -> Arc { self.to().arced() } diff --git a/src/array/growable/boolean.rs b/src/array/growable/boolean.rs index 994f5b4e92e..a9fb52ef3ed 100644 --- a/src/array/growable/boolean.rs +++ b/src/array/growable/boolean.rs @@ -71,6 +71,11 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.values.len() + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/src/array/growable/dictionary.rs b/src/array/growable/dictionary.rs index 930f4268f28..f550304c852 100644 --- a/src/array/growable/dictionary.rs +++ b/src/array/growable/dictionary.rs @@ -132,6 +132,11 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { ); } + #[inline] + fn len(&self) -> usize { + self.key_values.len() + } + #[inline] fn extend_validity(&mut self, additional: usize) { self.key_values diff --git a/src/array/growable/fixed_binary.rs b/src/array/growable/fixed_binary.rs index d096ff5a51f..763bd59c817 100644 --- a/src/array/growable/fixed_binary.rs +++ b/src/array/growable/fixed_binary.rs @@ -78,6 +78,11 @@ impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.values.len() / self.size + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/src/array/growable/fixed_size_list.rs b/src/array/growable/fixed_size_list.rs index 6fb88233a77..a70695f4554 100644 --- a/src/array/growable/fixed_size_list.rs +++ b/src/array/growable/fixed_size_list.rs @@ -85,6 +85,11 @@ impl<'a> Growable<'a> for GrowableFixedSizeList<'a> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.values.len() / self.size + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/src/array/growable/list.rs b/src/array/growable/list.rs index bc78a2d8e86..c0abb26dd72 100644 --- a/src/array/growable/list.rs +++ b/src/array/growable/list.rs @@ -97,6 +97,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableList<'a, O> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.offsets.len() - 1 + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/src/array/growable/mod.rs b/src/array/growable/mod.rs index eb79f7fed97..9ac97d42578 100644 --- a/src/array/growable/mod.rs +++ b/src/array/growable/mod.rs @@ -43,6 +43,9 @@ pub trait Growable<'a> { /// Extends this [`Growable`] with null elements, disregarding the bound arrays fn extend_validity(&mut self, additional: usize); + /// The current length of the [`Growable`]. + fn len(&self) -> usize; + /// Converts this [`Growable`] to an [`Arc`], thereby finishing the mutation. /// Self will be empty after such operation. fn as_arc(&mut self) -> Arc { diff --git a/src/array/growable/null.rs b/src/array/growable/null.rs index 208a389a7fc..ac97c478282 100644 --- a/src/array/growable/null.rs +++ b/src/array/growable/null.rs @@ -38,6 +38,11 @@ impl<'a> Growable<'a> for GrowableNull { self.length += additional; } + #[inline] + fn len(&self) -> usize { + self.length + } + fn as_arc(&mut self) -> Arc { Arc::new(NullArray::new(self.data_type.clone(), self.length)) } diff --git a/src/array/growable/primitive.rs b/src/array/growable/primitive.rs index 19a949e0f9d..e443756cb95 100644 --- a/src/array/growable/primitive.rs +++ b/src/array/growable/primitive.rs @@ -82,6 +82,11 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.values.len() + } + #[inline] fn as_arc(&mut self) -> Arc { Arc::new(self.to()) diff --git a/src/array/growable/structure.rs b/src/array/growable/structure.rs index 8e91cf03ce0..b1242e08a4f 100644 --- a/src/array/growable/structure.rs +++ b/src/array/growable/structure.rs @@ -104,6 +104,18 @@ impl<'a> Growable<'a> for GrowableStruct<'a> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + // All children should have the same indexing, so just use the first + // one. If we don't have children, we might still have a validity + // array, so use that. + if let Some(child) = self.values.get(0) { + child.len() + } else { + self.validity.len() + } + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/src/array/growable/union.rs b/src/array/growable/union.rs index 8356c228a4b..cccde2ee960 100644 --- a/src/array/growable/union.rs +++ b/src/array/growable/union.rs @@ -73,10 +73,15 @@ impl<'a> Growable<'a> for GrowableUnion<'a> { if let Some(x) = self.offsets.as_mut() { let offsets = &array.offsets().unwrap()[start..start + len]; - x.extend(offsets); // in a dense union, each slot has its own offset. We extend the fields accordingly. for (&type_, &offset) in types.iter().zip(offsets.iter()) { - self.fields[type_ as usize].extend(index, offset as usize, 1); + let field = &mut self.fields[type_ as usize]; + // The offset for the element that is about to be extended is the current length + // of the child field of the corresponding type. Note that this may be very + // different than the original offset from the array we are extending from as + // it is a function of the previous extensions to this child. + x.push(field.len() as i32); + field.extend(index, offset as usize, 1); } } else { // in a sparse union, every field has the same length => extend all fields equally @@ -88,6 +93,11 @@ impl<'a> Growable<'a> for GrowableUnion<'a> { fn extend_validity(&mut self, _additional: usize) {} + #[inline] + fn len(&self) -> usize { + self.types.len() + } + fn as_arc(&mut self) -> Arc { self.to().arced() } diff --git a/src/array/growable/utf8.rs b/src/array/growable/utf8.rs index 5e901577901..cd71da0a264 100644 --- a/src/array/growable/utf8.rs +++ b/src/array/growable/utf8.rs @@ -88,6 +88,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> { self.validity.extend_constant(additional, false); } + #[inline] + fn len(&self) -> usize { + self.offsets.len() - 1 + } + fn as_arc(&mut self) -> Arc { Arc::new(self.to()) } diff --git a/tests/it/array/growable/binary.rs b/tests/it/array/growable/binary.rs index 3ca9d80deb2..a54a8f9f805 100644 --- a/tests/it/array/growable/binary.rs +++ b/tests/it/array/growable/binary.rs @@ -10,6 +10,7 @@ fn no_offsets() { let mut a = GrowableBinary::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: BinaryArray = a.into(); @@ -27,6 +28,7 @@ fn with_offsets() { let mut a = GrowableBinary::new(vec![&array], false, 0); a.extend(0, 0, 3); + assert_eq!(a.len(), 3); let result: BinaryArray = a.into(); @@ -42,6 +44,7 @@ fn test_string_offsets() { let mut a = GrowableBinary::new(vec![&array], false, 0); a.extend(0, 0, 3); + assert_eq!(a.len(), 3); let result: BinaryArray = a.into(); @@ -58,6 +61,7 @@ fn test_multiple_with_validity() { a.extend(0, 0, 2); a.extend(1, 0, 2); + assert_eq!(a.len(), 4); let result: BinaryArray = a.into(); @@ -74,6 +78,7 @@ fn test_string_null_offset_validity() { a.extend(0, 1, 2); a.extend_validity(1); + assert_eq!(a.len(), 3); let result: BinaryArray = a.into(); diff --git a/tests/it/array/growable/boolean.rs b/tests/it/array/growable/boolean.rs index 780c677cce4..b52110b5d94 100644 --- a/tests/it/array/growable/boolean.rs +++ b/tests/it/array/growable/boolean.rs @@ -8,6 +8,7 @@ fn test_bool() { let mut a = GrowableBoolean::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: BooleanArray = a.into(); diff --git a/tests/it/array/growable/dictionary.rs b/tests/it/array/growable/dictionary.rs index 6d7a34dfffd..d4c75922de2 100644 --- a/tests/it/array/growable/dictionary.rs +++ b/tests/it/array/growable/dictionary.rs @@ -21,6 +21,7 @@ fn test_single() -> Result<()> { let mut growable = GrowableDictionary::new(&[&array], false, 0); growable.extend(0, 1, 2); + assert_eq!(growable.len(), 2); let result: DictionaryArray = growable.into(); @@ -56,6 +57,7 @@ fn test_multi() -> Result<()> { growable.extend(0, 1, 2); growable.extend(1, 1, 2); + assert_eq!(growable.len(), 4); let result: DictionaryArray = growable.into(); diff --git a/tests/it/array/growable/fixed_binary.rs b/tests/it/array/growable/fixed_binary.rs index 6c213432176..a14aecaef3b 100644 --- a/tests/it/array/growable/fixed_binary.rs +++ b/tests/it/array/growable/fixed_binary.rs @@ -12,6 +12,7 @@ fn basic() { let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: FixedSizeBinaryArray = a.into(); @@ -30,6 +31,7 @@ fn offsets() { let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); a.extend(0, 0, 3); + assert_eq!(a.len(), 3); let result: FixedSizeBinaryArray = a.into(); @@ -46,6 +48,7 @@ fn multiple_with_validity() { a.extend(0, 0, 2); a.extend(1, 0, 2); + assert_eq!(a.len(), 4); let result: FixedSizeBinaryArray = a.into(); @@ -63,6 +66,7 @@ fn null_offset_validity() { a.extend(0, 1, 2); a.extend_validity(1); + assert_eq!(a.len(), 3); let result: FixedSizeBinaryArray = a.into(); @@ -81,6 +85,7 @@ fn sized_offsets() { a.extend(0, 1, 1); a.extend(0, 0, 1); + assert_eq!(a.len(), 2); let result: FixedSizeBinaryArray = a.into(); diff --git a/tests/it/array/growable/fixed_size_list.rs b/tests/it/array/growable/fixed_size_list.rs index 692d75150d2..10474befdc0 100644 --- a/tests/it/array/growable/fixed_size_list.rs +++ b/tests/it/array/growable/fixed_size_list.rs @@ -21,6 +21,7 @@ fn basic() { let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); a.extend(0, 0, 1); + assert_eq!(a.len(), 1); let result: FixedSizeListArray = a.into(); @@ -42,6 +43,7 @@ fn null_offset() { let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); a.extend(0, 1, 1); + assert_eq!(a.len(), 1); let result: FixedSizeListArray = a.into(); @@ -70,6 +72,7 @@ fn test_from_two_lists() { let mut a = GrowableFixedSizeList::new(vec![&array_1, &array_2], false, 6); a.extend(0, 0, 2); a.extend(1, 1, 1); + assert_eq!(a.len(), 3); let result: FixedSizeListArray = a.into(); diff --git a/tests/it/array/growable/list.rs b/tests/it/array/growable/list.rs index 9aa56b109aa..f40dbdc4ecf 100644 --- a/tests/it/array/growable/list.rs +++ b/tests/it/array/growable/list.rs @@ -33,6 +33,7 @@ fn extension() { let mut a = GrowableList::new(vec![&array_ext], false, 0); a.extend(0, 0, 1); + assert_eq!(a.len(), 1); let result: ListArray = a.into(); assert_eq!(array_ext.data_type(), result.data_type()); @@ -51,6 +52,7 @@ fn basic() { let mut a = GrowableList::new(vec![&array], false, 0); a.extend(0, 0, 1); + assert_eq!(a.len(), 1); let result: ListArray = a.into(); @@ -72,6 +74,7 @@ fn null_offset() { let mut a = GrowableList::new(vec![&array], false, 0); a.extend(0, 1, 1); + assert_eq!(a.len(), 1); let result: ListArray = a.into(); @@ -93,6 +96,7 @@ fn null_offsets() { let mut a = GrowableList::new(vec![&array], false, 0); a.extend(0, 1, 1); + assert_eq!(a.len(), 1); let result: ListArray = a.into(); @@ -121,6 +125,7 @@ fn test_from_two_lists() { let mut a = GrowableList::new(vec![&array_1, &array_2], false, 6); a.extend(0, 0, 2); a.extend(1, 1, 1); + assert_eq!(a.len(), 3); let result: ListArray = a.into(); diff --git a/tests/it/array/growable/null.rs b/tests/it/array/growable/null.rs index 7e26e71cdda..1298f3f5686 100644 --- a/tests/it/array/growable/null.rs +++ b/tests/it/array/growable/null.rs @@ -12,6 +12,7 @@ fn null() { mutable.extend(0, 1, 2); mutable.extend(1, 0, 1); + assert_eq!(mutable.len(), 3); let result: NullArray = mutable.into(); diff --git a/tests/it/array/growable/primitive.rs b/tests/it/array/growable/primitive.rs index aec18582063..305a2affcc9 100644 --- a/tests/it/array/growable/primitive.rs +++ b/tests/it/array/growable/primitive.rs @@ -9,6 +9,7 @@ fn basics() { let b = PrimitiveArray::::from(vec![Some(1), Some(2), Some(3)]); let mut a = GrowablePrimitive::new(vec![&b], false, 3); a.extend(0, 0, 2); + assert_eq!(a.len(), 2); let result: PrimitiveArray = a.into(); let expected = PrimitiveArray::::from(vec![Some(1), Some(2)]); assert_eq!(result, expected); @@ -21,6 +22,7 @@ fn offset() { let b = b.slice(1, 2); let mut a = GrowablePrimitive::new(vec![&b], false, 2); a.extend(0, 0, 2); + assert_eq!(a.len(), 2); let result: PrimitiveArray = a.into(); let expected = PrimitiveArray::::from(vec![Some(2), Some(3)]); assert_eq!(result, expected); @@ -33,6 +35,7 @@ fn null_offset() { let b = b.slice(1, 2); let mut a = GrowablePrimitive::new(vec![&b], false, 2); a.extend(0, 0, 2); + assert_eq!(a.len(), 2); let result: PrimitiveArray = a.into(); let expected = PrimitiveArray::::from(vec![None, Some(3)]); assert_eq!(result, expected); @@ -46,6 +49,7 @@ fn null_offset_validity() { a.extend(0, 0, 2); a.extend_validity(3); a.extend(0, 1, 1); + assert_eq!(a.len(), 6); let result: PrimitiveArray = a.into(); let expected = PrimitiveArray::::from(&[Some(2), Some(3), None, None, None, Some(3)]); assert_eq!(result, expected); @@ -58,6 +62,7 @@ fn joining_arrays() { let mut a = GrowablePrimitive::new(vec![&b, &c], false, 4); a.extend(0, 0, 2); a.extend(1, 1, 2); + assert_eq!(a.len(), 4); let result: PrimitiveArray = a.into(); let expected = PrimitiveArray::::from(&[Some(1), Some(2), Some(5), Some(6)]); diff --git a/tests/it/array/growable/struct_.rs b/tests/it/array/growable/struct_.rs index e31058e112f..30ecb45ad27 100644 --- a/tests/it/array/growable/struct_.rs +++ b/tests/it/array/growable/struct_.rs @@ -36,6 +36,7 @@ fn basic() { let mut a = GrowableStruct::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: StructArray = a.into(); let expected = StructArray::new( @@ -55,6 +56,7 @@ fn offset() { let mut a = GrowableStruct::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: StructArray = a.into(); let expected = StructArray::new( @@ -79,6 +81,7 @@ fn nulls() { let mut a = GrowableStruct::new(vec![&array], false, 0); a.extend(0, 1, 2); + assert_eq!(a.len(), 2); let result: StructArray = a.into(); let expected = StructArray::new( @@ -101,6 +104,7 @@ fn many() { mutable.extend(0, 1, 2); mutable.extend(1, 0, 2); mutable.extend_validity(1); + assert_eq!(mutable.len(), 5); let result = mutable.as_box(); let expected_string: Box = Box::new(Utf8Array::::from([ diff --git a/tests/it/array/growable/union.rs b/tests/it/array/growable/union.rs index 607ef13d3e5..0d69e2a2a82 100644 --- a/tests/it/array/growable/union.rs +++ b/tests/it/array/growable/union.rs @@ -26,6 +26,8 @@ fn sparse() -> Result<()> { let mut a = GrowableUnion::new(vec![&array], 10); a.extend(0, index, length); + assert_eq!(a.len(), length); + let expected = array.slice(index, length); let result: UnionArray = a.into(); @@ -58,6 +60,7 @@ fn dense() -> Result<()> { let mut a = GrowableUnion::new(vec![&array], 10); a.extend(0, index, length); + assert_eq!(a.len(), length); let expected = array.slice(index, length); let result: UnionArray = a.into(); @@ -68,3 +71,83 @@ fn dense() -> Result<()> { Ok(()) } + +#[test] +fn complex_dense() -> Result<()> { + let fixed_size_type = + DataType::FixedSizeList(Box::new(Field::new("i", DataType::UInt16, true)), 3); + + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + Field::new("c", fixed_size_type.clone(), true), + ]; + + let data_type = DataType::Union(fields, None, UnionMode::Dense); + + // UnionArray[1, [11, 12, 13], abcd, [21, 22, 23], 2] + let types = vec![0, 2, 1, 2, 0].into(); + let fields = vec![ + Int32Array::from(&[Some(1), Some(2)]).boxed(), + Utf8Array::::from([Some("abcd")]).boxed(), + FixedSizeListArray::try_new( + fixed_size_type.clone(), + UInt16Array::from_iter([11, 12, 13, 21, 22, 23].into_iter().map(Some)).boxed(), + None, + ) + .unwrap() + .boxed(), + ]; + let offsets = Some(vec![0, 0, 0, 1, 1].into()); + + let array1 = UnionArray::new(data_type.clone(), types, fields, offsets); + + // UnionArray[[31, 32, 33], [41, 42, 43], ef, ghijk, 3] + let types = vec![2, 2, 1, 1, 0].into(); + let fields = vec![ + Int32Array::from(&[Some(3)]).boxed(), + Utf8Array::::from([Some("ef"), Some("ghijk")]).boxed(), + FixedSizeListArray::try_new( + fixed_size_type.clone(), + UInt16Array::from_iter([31, 32, 33, 41, 42, 43].into_iter().map(Some)).boxed(), + None, + ) + .unwrap() + .boxed(), + ]; + let offsets = Some(vec![0, 1, 0, 1, 0].into()); + + let array2 = UnionArray::new(data_type.clone(), types, fields, offsets); + + let mut a = GrowableUnion::new(vec![&array1, &array2], 10); + + // Take the whole first array + a.extend(0, 0, 5); + // Skip the first value from the second array: [31, 32, 33] + a.extend(1, 1, 4); + assert_eq!(a.len(), 9); + + let result: UnionArray = a.into(); + + // UnionArray[1, [11, 12, 13], abcd, [21, 22, 23], 2, [41, 42, 43], ef, ghijk, 3] + let types = vec![0, 2, 1, 2, 0, 2, 1, 1, 0].into(); + let fields = vec![ + Int32Array::from(&[Some(1), Some(2), Some(3)]).boxed(), + Utf8Array::::from([Some("abcd"), Some("ef"), Some("ghijk")]).boxed(), + FixedSizeListArray::try_new( + fixed_size_type, + UInt16Array::from_iter([11, 12, 13, 21, 22, 23, 41, 42, 43].into_iter().map(Some)) + .boxed(), + None, + ) + .unwrap() + .boxed(), + ]; + let offsets = Some(vec![0, 0, 0, 1, 1, 2, 1, 2, 2].into()); + + let expected = UnionArray::new(data_type, types, fields, offsets); + + assert_eq!(expected, result); + + Ok(()) +} diff --git a/tests/it/array/growable/utf8.rs b/tests/it/array/growable/utf8.rs index 5e4308b5778..14221dc97fd 100644 --- a/tests/it/array/growable/utf8.rs +++ b/tests/it/array/growable/utf8.rs @@ -28,6 +28,7 @@ fn offsets() { let mut a = GrowableUtf8::new(vec![&array], false, 0); a.extend(0, 0, 3); + assert_eq!(a.len(), 3); let result: Utf8Array = a.into(); @@ -43,6 +44,7 @@ fn offsets2() { let mut a = GrowableUtf8::new(vec![&array], false, 0); a.extend(0, 0, 3); + assert_eq!(a.len(), 3); let result: Utf8Array = a.into(); @@ -59,6 +61,7 @@ fn multiple_with_validity() { a.extend(0, 0, 2); a.extend(1, 0, 2); + assert_eq!(a.len(), 4); let result: Utf8Array = a.into(); @@ -75,6 +78,7 @@ fn null_offset_validity() { a.extend(0, 1, 2); a.extend_validity(1); + assert_eq!(a.len(), 3); let result: Utf8Array = a.into();