Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Correctly update child-offsets in GrowableUnion #1360

Merged
merged 6 commits into from
Jan 16, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/array/growable/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.offsets.len() - 1
}

fn as_arc(&mut self) -> Arc<dyn Array> {
self.to().arced()
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.values.len()
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {
);
}

#[inline]
fn next_offset(&self) -> usize {
self.key_values.len()
}

#[inline]
fn extend_validity(&mut self, additional: usize) {
self.key_values
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.values.len() / self.size
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ impl<'a> Growable<'a> for GrowableFixedSizeList<'a> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.values.next_offset() / self.size
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableList<'a, O> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.offsets.len() - 1
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
4 changes: 4 additions & 0 deletions src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ pub trait Growable<'a> {
/// Extends this [`Growable`] with null elements, disregarding the bound arrays
fn extend_validity(&mut self, additional: usize);

/// Get the offset of the next value to be added to this [`Growable`].
/// Used to remap offsets when building up a [`GrowableUnion`].
fn next_offset(&self) -> usize;
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved

/// Converts this [`Growable`] to an [`Arc<dyn Array>`], thereby finishing the mutation.
/// Self will be empty after such operation.
fn as_arc(&mut self) -> Arc<dyn Array> {
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ impl<'a> Growable<'a> for GrowableNull {
self.length += additional;
}

#[inline]
fn next_offset(&self) -> usize {
self.length
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(NullArray::new(self.data_type.clone(), self.length))
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.values.len()
}

#[inline]
fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
Expand Down
12 changes: 12 additions & 0 deletions src/array/growable/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ impl<'a> Growable<'a> for GrowableStruct<'a> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
// All children should have the same indexing, so just use the first
// one. If we don't have children, we might still have a validity
// array, so use that.
if let Some(child) = self.values.get(0) {
child.next_offset()
} else {
self.validity.len()
}
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
10 changes: 8 additions & 2 deletions src/array/growable/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ impl<'a> Growable<'a> for GrowableUnion<'a> {
if let Some(x) = self.offsets.as_mut() {
let offsets = &array.offsets().unwrap()[start..start + len];

x.extend(offsets);
// in a dense union, each slot has its own offset. We extend the fields accordingly.
for (&type_, &offset) in types.iter().zip(offsets.iter()) {
self.fields[type_ as usize].extend(index, offset as usize, 1);
let field = &mut self.fields[type_ as usize];
x.push(field.next_offset() as i32);
field.extend(index, offset as usize, 1);
}
} else {
// in a sparse union, every field has the same length => extend all fields equally
Expand All @@ -88,6 +89,11 @@ impl<'a> Growable<'a> for GrowableUnion<'a> {

fn extend_validity(&mut self, _additional: usize) {}

#[inline]
fn next_offset(&self) -> usize {
self.types.len()
}

fn as_arc(&mut self) -> Arc<dyn Array> {
self.to().arced()
}
Expand Down
5 changes: 5 additions & 0 deletions src/array/growable/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> {
self.validity.extend_constant(additional, false);
}

#[inline]
fn next_offset(&self) -> usize {
self.offsets.len() - 1
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
Expand Down
79 changes: 79 additions & 0 deletions tests/it/array/growable/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,82 @@ fn dense() -> Result<()> {

Ok(())
}

#[test]
fn complex_dense() -> Result<()> {
let fixed_size_type =
DataType::FixedSizeList(Box::new(Field::new("i", DataType::UInt16, true)), 3);

let fields = vec![
Field::new("a", DataType::Int32, true),
Field::new("b", DataType::Utf8, true),
Field::new("c", fixed_size_type.clone(), true),
];

let data_type = DataType::Union(fields, None, UnionMode::Dense);

// UnionArray[1, [11, 12, 13], abcd, [21, 22, 23], 2]
let types = vec![0, 2, 1, 2, 0].into();
let fields = vec![
Int32Array::from(&[Some(1), Some(2)]).boxed(),
Utf8Array::<i32>::from([Some("abcd")]).boxed(),
FixedSizeListArray::try_new(
fixed_size_type.clone(),
UInt16Array::from_iter([11, 12, 13, 21, 22, 23].into_iter().map(Some)).boxed(),
None,
)
.unwrap()
.boxed(),
];
let offsets = Some(vec![0, 0, 0, 1, 1].into());

let array1 = UnionArray::new(data_type.clone(), types, fields, offsets);

// UnionArray[[31, 32, 33], [41, 42, 43], ef, ghijk, 3]
let types = vec![2, 2, 1, 1, 0].into();
let fields = vec![
Int32Array::from(&[Some(3)]).boxed(),
Utf8Array::<i32>::from([Some("ef"), Some("ghijk")]).boxed(),
FixedSizeListArray::try_new(
fixed_size_type.clone(),
UInt16Array::from_iter([31, 32, 33, 41, 42, 43].into_iter().map(Some)).boxed(),
None,
)
.unwrap()
.boxed(),
];
let offsets = Some(vec![0, 1, 0, 1, 0].into());

let array2 = UnionArray::new(data_type.clone(), types, fields, offsets);

let mut a = GrowableUnion::new(vec![&array1, &array2], 10);

// Take the whole first array
a.extend(0, 0, 5);
// Skip the first value from the second array: [31, 32, 33]
a.extend(1, 1, 4);

let result: UnionArray = a.into();

// UnionArray[1, [11, 12, 13], abcd, [21, 22, 23], 2, [41, 42, 43], ef, ghijk, 3]
let types = vec![0, 2, 1, 2, 0, 2, 1, 1, 0].into();
let fields = vec![
Int32Array::from(&[Some(1), Some(2), Some(3)]).boxed(),
Utf8Array::<i32>::from([Some("abcd"), Some("ef"), Some("ghijk")]).boxed(),
FixedSizeListArray::try_new(
fixed_size_type,
UInt16Array::from_iter([11, 12, 13, 21, 22, 23, 41, 42, 43].into_iter().map(Some))
.boxed(),
None,
)
.unwrap()
.boxed(),
];
let offsets = Some(vec![0, 0, 0, 1, 1, 2, 1, 2, 2].into());

let expected = UnionArray::new(data_type, types, fields, offsets);

assert_eq!(expected, result);

Ok(())
}