From b6b7ae6e8cd2255a5ee56867b649815479d320b5 Mon Sep 17 00:00:00 2001 From: npenke <16369152+ncpenke@users.noreply.github.com> Date: Sun, 6 Mar 2022 18:51:41 -0800 Subject: [PATCH] Bug fix in offset for sliced unions --- src/array/union/mod.rs | 12 ++- tests/it/array/union.rs | 176 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 6 deletions(-) diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index 47277bc9b37..40825705ef8 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -191,7 +191,10 @@ impl UnionArray { fields: self.fields.clone(), fields_hash: self.fields_hash.clone(), types: self.types.clone().slice(offset, length), - offsets: self.offsets.clone(), + offsets: self + .offsets + .clone() + .map(|offsets| offsets.slice(offset, length)), offset: self.offset + offset, } } @@ -208,7 +211,10 @@ impl UnionArray { fields: self.fields.clone(), fields_hash: self.fields_hash.clone(), types: self.types.clone().slice_unchecked(offset, length), - offsets: self.offsets.clone(), + offsets: self + .offsets + .clone() + .map(|offsets| offsets.slice_unchecked(offset, length)), offset: self.offset + offset, } } @@ -249,7 +255,7 @@ impl UnionArray { self.offsets() .as_ref() .map(|x| x[index] as usize) - .unwrap_or(index) + .unwrap_or(index + self.offset) } /// Returns the index and slot of the field to select from `self.fields`. diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index f4eef0da9ad..4066c0ad9b1 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -1,6 +1,12 @@ use std::sync::Arc; -use arrow2::{array::*, buffer::Buffer, datatypes::*, error::Result}; +use arrow2::{ + array::*, + buffer::Buffer, + datatypes::*, + error::Result, + scalar::{PrimitiveScalar, Utf8Scalar}, +}; #[test] fn debug() -> Result<()> { @@ -39,9 +45,173 @@ fn slice() -> Result<()> { let result = array.slice(1, 2); - let types = Buffer::from_slice([0, 1]); - let expected = UnionArray::from_data(data_type, types, fields, None); + let sliced_types = Buffer::from_slice([0, 1]); + let sliced_fields = vec![ + Arc::new(Int32Array::from(&[None, Some(2)])) as Arc, + Arc::new(Utf8Array::::from(&[Some("b"), Some("c")])) as Arc, + ]; + let expected = UnionArray::from_data(data_type, sliced_types, sliced_fields, None); assert_eq!(expected, result); Ok(()) } + +#[test] +fn iter_sparse() -> Result<()> { + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]; + let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let types = Buffer::from_slice([0, 0, 1]); + let fields = vec![ + Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, + Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, + ]; + + let array = UnionArray::from_data(data_type, types, fields.clone(), None); + let mut iter = array.iter(); + + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some(1) + ); + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + None + ); + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some("c") + ); + assert_eq!(iter.next(), None); + + Ok(()) +} + +#[test] +fn iter_dense() -> Result<()> { + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]; + let data_type = DataType::Union(fields, None, UnionMode::Dense); + let types = Buffer::from_slice([0, 0, 1]); + let offsets = Buffer::::from_slice([0, 1, 0]); + let fields = vec![ + Arc::new(Int32Array::from(&[Some(1), None])) as Arc, + Arc::new(Utf8Array::::from(&[Some("c")])) as Arc, + ]; + + let array = UnionArray::from_data(data_type, types, fields.clone(), Some(offsets)); + let mut iter = array.iter(); + + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some(1) + ); + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + None + ); + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some("c") + ); + assert_eq!(iter.next(), None); + + Ok(()) +} + +#[test] +fn iter_sparse_slice() -> Result<()> { + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]; + let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let types = Buffer::from_slice([0, 0, 1]); + let fields = vec![ + Arc::new(Int32Array::from(&[Some(1), Some(3), Some(2)])) as Arc, + Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, + ]; + + let array = UnionArray::from_data(data_type, types, fields.clone(), None); + let array_slice = array.slice(1, 1); + let mut iter = array_slice.iter(); + + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some(3) + ); + assert_eq!(iter.next(), None); + + Ok(()) +} + +#[test] +fn iter_dense_slice() -> Result<()> { + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]; + let data_type = DataType::Union(fields, None, UnionMode::Dense); + let types = Buffer::from_slice([0, 0, 1]); + let offsets = Buffer::::from_slice([0, 1, 0]); + let fields = vec![ + Arc::new(Int32Array::from(&[Some(1), Some(3)])) as Arc, + Arc::new(Utf8Array::::from(&[Some("c")])) as Arc, + ]; + + let array = UnionArray::from_data(data_type, types, fields.clone(), Some(offsets)); + let array_slice = array.slice(1, 1); + let mut iter = array_slice.iter(); + + assert_eq!( + iter.next() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .value(), + Some(3) + ); + assert_eq!(iter.next(), None); + + Ok(()) +}