diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index d5099a48205..068e595e687 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -169,15 +169,7 @@ impl MutableBooleanArray { P: std::borrow::Borrow, I: TrustedLen>, { - let (validity, values) = unsafe { trusted_len_unzip(iterator) }; - - let validity = if validity.null_count() > 0 { - Some(validity) - } else { - None - }; - - Self::from_data(values, validity) + unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } /// Creates a [`BooleanArray`] from an falible iterator of trusted length. @@ -210,15 +202,7 @@ impl MutableBooleanArray { P: std::borrow::Borrow, I: TrustedLen, E>>, { - let (validity, values) = unsafe { try_trusted_len_unzip(iterator)? }; - - let validity = if validity.null_count() > 0 { - Some(validity) - } else { - None - }; - - Ok(Self::from_data(values, validity)) + unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } } } diff --git a/src/array/mod.rs b/src/array/mod.rs index 0ad9886275d..4d1366dbf21 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -258,7 +258,8 @@ pub fn new_empty_array(data_type: DataType) -> Box { } /// Creates a new [`Array`] of [`DataType`] `data_type` and `length`. -/// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`]. +/// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`] +/// for all types except Union, which does not have a validity. pub fn new_null_array(data_type: DataType, length: usize) -> Box { match data_type { DataType::Null => Box::new(NullArray::new_null(length)), diff --git a/src/array/primitive/iterator.rs b/src/array/primitive/iterator.rs index 2d878baa595..5d5049e34fc 100644 --- a/src/array/primitive/iterator.rs +++ b/src/array/primitive/iterator.rs @@ -3,7 +3,8 @@ use crate::{ types::NativeType, }; -use super::PrimitiveArray; +use super::super::MutableArray; +use super::{MutablePrimitiveArray, PrimitiveArray}; impl<'a, T: NativeType> IntoIterator for &'a PrimitiveArray { type Item = Option<&'a T>; @@ -25,3 +26,20 @@ impl<'a, T: NativeType> PrimitiveArray { ) } } + +impl<'a, T: NativeType> MutablePrimitiveArray { + /// Returns an iterator over `Option` + #[inline] + pub fn iter(&'a self) -> ZipValidity<'a, &'a T, std::slice::Iter<'a, T>> { + zip_validity( + self.values().iter(), + self.validity().as_ref().map(|x| x.iter()), + ) + } + + /// Returns an iterator of `bool` + #[inline] + pub fn values_iter(&'a self) -> std::slice::Iter<'a, T> { + self.values().iter() + } +} diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index 2e80840249b..a4f771ebe39 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -332,13 +332,7 @@ impl MutablePrimitiveArray { P: std::borrow::Borrow, I: TrustedLen>, { - let (validity, values) = unsafe { trusted_len_unzip(iterator) }; - - Self { - data_type: T::DATA_TYPE, - values, - validity, - } + unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length. @@ -371,13 +365,7 @@ impl MutablePrimitiveArray { P: std::borrow::Borrow, I: TrustedLen, E>>, { - let (validity, values) = unsafe { try_trusted_len_unzip(iterator) }?; - - Ok(Self { - data_type: T::DATA_TYPE, - values, - validity, - }) + unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } } /// Creates a new [`MutablePrimitiveArray`] out an iterator over values @@ -543,3 +531,9 @@ where Ok((validity, buffer)) } + +impl PartialEq for MutablePrimitiveArray { + fn eq(&self, other: &Self) -> bool { + self.iter().eq(other.iter()) + } +} diff --git a/src/array/specification.rs b/src/array/specification.rs index 2db3d76293e..e0a09757a54 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -2,7 +2,7 @@ use std::convert::TryFrom; use num::Num; -use crate::{buffer::Buffer, types::Index}; +use crate::types::Index; /// Trait describing types that can be used as offsets as per Arrow specification. /// This trait is only implemented for `i32` and `i64`, the two sizes part of the specification. @@ -51,15 +51,13 @@ unsafe impl Offset for i64 { } #[inline] -pub fn check_offsets(offsets: &Buffer, values_len: usize) -> usize { +pub fn check_offsets(offsets: &[O], values_len: usize) -> usize { assert!( !offsets.is_empty(), "The length of the offset buffer must be larger than 1" ); let len = offsets.len() - 1; - let offsets = offsets.as_slice(); - let last_offset = offsets[len]; let last_offset = last_offset.to_usize(); @@ -71,9 +69,9 @@ pub fn check_offsets(offsets: &Buffer, values_len: usize) -> usize } #[inline] -pub fn check_offsets_and_utf8(offsets: &Buffer, values: &Buffer) -> usize { +pub fn check_offsets_and_utf8(offsets: &[O], values: &[u8]) -> usize { let len = check_offsets(offsets, values.len()); - offsets.as_slice().windows(2).for_each(|window| { + offsets.windows(2).for_each(|window| { let start = window[0].to_usize(); let end = window[1].to_usize(); assert!(end <= values.len()); diff --git a/src/array/utf8/from.rs b/src/array/utf8/from.rs index faf9125ed07..8c8f75468ba 100644 --- a/src/array/utf8/from.rs +++ b/src/array/utf8/from.rs @@ -1,11 +1,7 @@ use std::iter::FromIterator; +use crate::array::Offset; use crate::trusted_len::TrustedLen; -use crate::{ - array::Offset, - bitmap::{Bitmap, MutableBitmap}, - buffer::{Buffer, MutableBuffer}, -}; use super::{MutableUtf8Array, Utf8Array}; @@ -28,15 +24,12 @@ impl Utf8Array { pub fn from_trusted_len_values_iter, I: TrustedLen>( iterator: I, ) -> Self { - let (offsets, values) = unsafe { trusted_len_values_iter(iterator) }; - Self::from_data(offsets, values, None) + MutableUtf8Array::::from_trusted_len_values_iter(iterator).into() } /// Creates a new [`Utf8Array`] from a [`Iterator`] of `&str`. - pub fn from_iter_values, I: IntoIterator>(iter: I) -> Self { - let iterator = iter.into_iter(); - let (offsets, values) = values_iter(iterator); - Self::from_data(offsets, values, None) + pub fn from_iter_values, I: Iterator>(iterator: I) -> Self { + MutableUtf8Array::::from_iter_values(iterator).into() } } @@ -51,10 +44,7 @@ impl Utf8Array { P: AsRef, I: Iterator>, { - let (validity, offsets, values) = trusted_len_unzip(iterator); - - // soundness: P is `str` - Self::from_data_unchecked(offsets, values, validity) + MutableUtf8Array::::from_trusted_len_iter_unchecked(iterator).into() } /// Creates a [`Utf8Array`] from an iterator of trusted length. @@ -68,183 +58,29 @@ impl Utf8Array { unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } - /// Creates a [`PrimitiveArray`] from an falible iterator of trusted length. + /// Creates a [`Utf8Array`] from an falible iterator of trusted length. /// # Safety /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). /// I.e. that `size_hint().1` correctly reports its length. #[inline] - pub unsafe fn try_from_trusted_len_iter(iter: I) -> Result + pub unsafe fn try_from_trusted_len_iter_unchecked(iterator: I) -> Result where P: AsRef, I: IntoIterator, E>>, { - let iterator = iter.into_iter(); - - let (validity, offsets, values) = try_trusted_len_unzip(iterator)?; - - // soundness: P is `str` - Ok(Self::from_data_unchecked(offsets, values, validity)) - } -} - -/// Creates [`Bitmap`] and two [`Buffer`]s from an iterator of `Option`. -/// The first buffer corresponds to a offset buffer, the second one -/// corresponds to a values buffer. -/// # Safety -/// The caller must ensure that `iterator` is `TrustedLen`. -#[inline] -pub(crate) unsafe fn trusted_len_unzip( - iterator: I, -) -> (Option, Buffer, Buffer) -where - O: Offset, - P: AsRef, - I: Iterator>, -{ - let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); - - let mut null = MutableBitmap::with_capacity(len); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); - - let mut length = O::default(); - let mut dst = offsets.as_mut_ptr(); - std::ptr::write(dst, length); - dst = dst.add(1); - for item in iterator { - if let Some(item) = item { - null.push(true); - let s = item.as_ref(); - length += O::from_usize(s.len()).unwrap(); - values.extend_from_slice(s.as_bytes()); - } else { - null.push(false); - values.extend_from_slice(b""); - }; - - std::ptr::write(dst, length); - dst = dst.add(1); - } - assert_eq!( - dst.offset_from(offsets.as_ptr()) as usize, - len + 1, - "Trusted iterator length was not accurately reported" - ); - offsets.set_len(len + 1); - - (null.into(), offsets.into(), values.into()) -} - -/// Creates two [`Buffer`]s from an iterator of `&str`. -/// The first buffer corresponds to a offset buffer, the second to a values buffer. -/// # Safety -/// The caller must ensure that `iterator` is [`TrustedLen`]. -#[inline] -pub(crate) unsafe fn trusted_len_values_iter(iterator: I) -> (Buffer, Buffer) -where - O: Offset, - P: AsRef, - I: Iterator, -{ - let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); - - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); - - let mut length = O::default(); - let mut dst = offsets.as_mut_ptr(); - std::ptr::write(dst, length); - dst = dst.add(1); - for item in iterator { - let s = item.as_ref(); - length += O::from_usize(s.len()).unwrap(); - values.extend_from_slice(s.as_bytes()); - - std::ptr::write(dst, length); - dst = dst.add(1); + MutableUtf8Array::::try_from_trusted_len_iter_unchecked(iterator).map(|x| x.into()) } - assert_eq!( - dst.offset_from(offsets.as_ptr()) as usize, - len + 1, - "Trusted iterator length was not accurately reported" - ); - offsets.set_len(len + 1); - (offsets.into(), values.into()) -} - -/// Creates two [`Buffer`]s from an iterator of `&str`. -/// The first buffer corresponds to a offset buffer, the second to a values buffer. -#[inline] -fn values_iter(iterator: I) -> (Buffer, Buffer) -where - O: Offset, - P: AsRef, - I: Iterator, -{ - let (lower, _) = iterator.size_hint(); - - let mut offsets = MutableBuffer::::with_capacity(lower + 1); - let mut values = MutableBuffer::::new(); - - let mut length = O::default(); - offsets.push(length); - - for item in iterator { - let s = item.as_ref(); - length += O::from_usize(s.len()).unwrap(); - values.extend_from_slice(s.as_bytes()); - - offsets.push(length) - } - (offsets.into(), values.into()) -} - -/// # Safety -/// The caller must ensure that `iterator` is `TrustedLen`. -#[inline] -#[allow(clippy::type_complexity)] -pub(crate) unsafe fn try_trusted_len_unzip( - iterator: I, -) -> Result<(Option, Buffer, Buffer), E> -where - O: Offset, - P: AsRef, - I: Iterator, E>>, -{ - let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); - - let mut null = MutableBitmap::with_capacity(len); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); - - let mut length = O::default(); - let mut dst = offsets.as_mut_ptr(); - std::ptr::write(dst, length); - dst = dst.add(1); - for item in iterator { - if let Some(item) = item? { - null.push(true); - let s = item.as_ref(); - length += O::from_usize(s.len()).unwrap(); - values.extend_from_slice(s.as_bytes()); - } else { - null.push(false); - }; - std::ptr::write(dst, length); - dst = dst.add(1); + /// Creates a [`Utf8Array`] from an fallible iterator of trusted length. + #[inline] + pub fn try_from_trusted_len_iter(iter: I) -> Result + where + P: AsRef, + I: TrustedLen, E>>, + { + // soundness: I: TrustedLen + unsafe { Self::try_from_trusted_len_iter_unchecked(iter) } } - assert_eq!( - dst.offset_from(offsets.as_ptr()) as usize, - len + 1, - "Trusted iterator length was not accurately reported" - ); - offsets.set_len(len + 1); - - Ok((null.into(), offsets.into(), values.into())) } impl> FromIterator> for Utf8Array { diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 4ba5038eb7f..1e3b27020c6 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -1,11 +1,15 @@ use std::{iter::FromIterator, sync::Arc}; use crate::{ - array::{Array, MutableArray, Offset, TryExtend}, + array::{ + specification::{check_offsets, check_offsets_and_utf8}, + Array, MutableArray, Offset, TryExtend, + }, bitmap::MutableBitmap, buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, + trusted_len::TrustedLen, }; use super::Utf8Array; @@ -46,6 +50,51 @@ impl MutableUtf8Array { } } + /// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs. + /// # Panics + /// This function panics iff: + /// * The `offsets` and `values` are inconsistent + /// * The `values` between `offsets` are not utf8 encoded + /// * The validity is not `None` and its length is different from `offsets`'s length minus one. + pub fn from_data( + offsets: MutableBuffer, + values: MutableBuffer, + validity: Option, + ) -> Self { + check_offsets_and_utf8(&offsets, &values); + if let Some(ref validity) = validity { + assert_eq!(offsets.len() - 1, validity.len()); + } + Self { + offsets, + values, + validity, + } + } + + /// Create a [`MutableUtf8Array`] out of low-end APIs. + /// # Safety + /// The caller must ensure that every value between offsets is a valid utf8. + /// # Panics + /// This function panics iff: + /// * The `offsets` and `values` are inconsistent + /// * The validity is not `None` and its length is different from `offsets`'s length minus one. + pub unsafe fn from_data_unchecked( + offsets: MutableBuffer, + values: MutableBuffer, + validity: Option, + ) -> Self { + check_offsets(&offsets, values.len()); + if let Some(ref validity) = validity { + assert_eq!(offsets.len() - 1, validity.len()); + } + Self { + offsets, + values, + validity, + } + } + /// Initializes a new [`MutableUtf8Array`] with a pre-allocated capacity of slots. pub fn with_capacity(capacity: usize) -> Self { Self::with_capacities(capacity, 0) @@ -185,6 +234,44 @@ impl> FromIterator> for MutableUtf8Array { } impl MutableUtf8Array { + /// Creates a [`MutableUtf8Array`] from an iterator of trusted length. + /// # Safety + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. + #[inline] + pub unsafe fn from_trusted_len_iter_unchecked(iterator: I) -> Self + where + P: AsRef, + I: Iterator>, + { + let (validity, offsets, values) = trusted_len_unzip(iterator); + + // soundness: P is `str` + Self::from_data_unchecked(offsets, values, validity) + } + + /// Creates a [`MutableUtf8Array`] from an iterator of trusted length. + #[inline] + pub fn from_trusted_len_iter(iterator: I) -> Self + where + P: AsRef, + I: TrustedLen>, + { + // soundness: I is `TrustedLen` + unsafe { Self::from_trusted_len_iter_unchecked(iterator) } + } + + /// Creates a new [`Utf8Array`] from a [`TrustedLen`] of `&str`. + #[inline] + pub fn from_trusted_len_values_iter, I: TrustedLen>( + iterator: I, + ) -> Self { + // soundness: I is `TrustedLen` + let (offsets, values) = unsafe { trusted_len_values_iter(iterator) }; + // soundness: T is AsRef + unsafe { Self::from_data_unchecked(offsets, values, None) } + } + /// Creates a new [`MutableUtf8Array`] from an iterator. /// # Error /// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value. @@ -198,6 +285,45 @@ impl MutableUtf8Array { } Ok(array) } + + /// Creates a [`MutableUtf8Array`] from an falible iterator of trusted length. + /// # Safety + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. + #[inline] + pub unsafe fn try_from_trusted_len_iter_unchecked( + iterator: I, + ) -> std::result::Result + where + P: AsRef, + I: IntoIterator, E>>, + { + let iterator = iterator.into_iter(); + + // soundness: assumed trusted len + let (validity, offsets, values) = try_trusted_len_unzip(iterator)?; + + // soundness: P is `str` + Ok(Self::from_data_unchecked(offsets, values, validity)) + } + + /// Creates a [`MutableUtf8Array`] from an falible iterator of trusted length. + #[inline] + pub fn try_from_trusted_len_iter(iterator: I) -> std::result::Result + where + P: AsRef, + I: TrustedLen, E>>, + { + // soundness: I: TrustedLen + unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } + } + + /// Creates a new [`MutableUtf8Array`] from a [`Iterator`] of `&str`. + pub fn from_iter_values, I: Iterator>(iterator: I) -> Self { + let (offsets, values) = values_iter(iterator); + // soundness: T: AsRef + unsafe { Self::from_data_unchecked(offsets, values, None) } + } } impl> Extend> for MutableUtf8Array { @@ -213,3 +339,177 @@ impl> TryExtend> for MutableUtf8Array { iter.try_for_each(|x| self.try_push(x)) } } + +/// Creates [`MutableBitmap`] and two [`MutableBuffer`]s from an iterator of `Option`. +/// The first buffer corresponds to a offset buffer, the second one +/// corresponds to a values buffer. +/// # Safety +/// The caller must ensure that `iterator` is `TrustedLen`. +#[inline] +unsafe fn trusted_len_unzip( + iterator: I, +) -> (Option, MutableBuffer, MutableBuffer) +where + O: Offset, + P: AsRef, + I: Iterator>, +{ + let (_, upper) = iterator.size_hint(); + let len = upper.expect("trusted_len_unzip requires an upper limit"); + + let mut validity = MutableBitmap::with_capacity(len); + let mut offsets = MutableBuffer::::with_capacity(len + 1); + let mut values = MutableBuffer::::new(); + + let mut length = O::default(); + let mut dst = offsets.as_mut_ptr(); + std::ptr::write(dst, length); + dst = dst.add(1); + for item in iterator { + if let Some(item) = item { + validity.push(true); + let s = item.as_ref(); + length += O::from_usize(s.len()).unwrap(); + values.extend_from_slice(s.as_bytes()); + } else { + validity.push(false); + values.extend_from_slice(b""); + }; + + std::ptr::write(dst, length); + dst = dst.add(1); + } + assert_eq!( + dst.offset_from(offsets.as_ptr()) as usize, + len + 1, + "Trusted iterator length was not accurately reported" + ); + offsets.set_len(len + 1); + + let validity = if validity.null_count() > 0 { + Some(validity) + } else { + None + }; + + (validity, offsets, values) +} + +/// # Safety +/// The caller must ensure that `iterator` is `TrustedLen`. +#[inline] +#[allow(clippy::type_complexity)] +pub(crate) unsafe fn try_trusted_len_unzip( + iterator: I, +) -> std::result::Result<(Option, MutableBuffer, MutableBuffer), E> +where + O: Offset, + P: AsRef, + I: Iterator, E>>, +{ + let (_, upper) = iterator.size_hint(); + let len = upper.expect("trusted_len_unzip requires an upper limit"); + + let mut validity = MutableBitmap::with_capacity(len); + let mut offsets = MutableBuffer::::with_capacity(len + 1); + let mut values = MutableBuffer::::new(); + + let mut length = O::default(); + let mut dst = offsets.as_mut_ptr(); + std::ptr::write(dst, length); + dst = dst.add(1); + for item in iterator { + if let Some(item) = item? { + validity.push(true); + let s = item.as_ref(); + length += O::from_usize(s.len()).unwrap(); + values.extend_from_slice(s.as_bytes()); + } else { + validity.push(false); + }; + std::ptr::write(dst, length); + dst = dst.add(1); + } + assert_eq!( + dst.offset_from(offsets.as_ptr()) as usize, + len + 1, + "Trusted iterator length was not accurately reported" + ); + offsets.set_len(len + 1); + + let validity = if validity.null_count() > 0 { + Some(validity) + } else { + None + }; + + Ok((validity, offsets, values)) +} + +/// Creates two [`Buffer`]s from an iterator of `&str`. +/// The first buffer corresponds to a offset buffer, the second to a values buffer. +/// # Safety +/// The caller must ensure that `iterator` is [`TrustedLen`]. +#[inline] +pub(crate) unsafe fn trusted_len_values_iter( + iterator: I, +) -> (MutableBuffer, MutableBuffer) +where + O: Offset, + P: AsRef, + I: Iterator, +{ + let (_, upper) = iterator.size_hint(); + let len = upper.expect("trusted_len_unzip requires an upper limit"); + + let mut offsets = MutableBuffer::::with_capacity(len + 1); + let mut values = MutableBuffer::::new(); + + let mut length = O::default(); + let mut dst = offsets.as_mut_ptr(); + std::ptr::write(dst, length); + dst = dst.add(1); + for item in iterator { + let s = item.as_ref(); + length += O::from_usize(s.len()).unwrap(); + values.extend_from_slice(s.as_bytes()); + + std::ptr::write(dst, length); + dst = dst.add(1); + } + assert_eq!( + dst.offset_from(offsets.as_ptr()) as usize, + len + 1, + "Trusted iterator length was not accurately reported" + ); + offsets.set_len(len + 1); + + (offsets, values) +} + +/// Creates two [`MutableBuffer`]s from an iterator of `&str`. +/// The first buffer corresponds to a offset buffer, the second to a values buffer. +#[inline] +fn values_iter(iterator: I) -> (MutableBuffer, MutableBuffer) +where + O: Offset, + P: AsRef, + I: Iterator, +{ + let (lower, _) = iterator.size_hint(); + + let mut offsets = MutableBuffer::::with_capacity(lower + 1); + let mut values = MutableBuffer::::new(); + + let mut length = O::default(); + offsets.push(length); + + for item in iterator { + let s = item.as_ref(); + length += O::from_usize(s.len()).unwrap(); + values.extend_from_slice(s.as_bytes()); + + offsets.push(length) + } + (offsets, values) +} diff --git a/tests/it/array/boolean/mutable.rs b/tests/it/array/boolean/mutable.rs index 4ea81b15ffd..07c4a7e54e7 100644 --- a/tests/it/array/boolean/mutable.rs +++ b/tests/it/array/boolean/mutable.rs @@ -1,4 +1,5 @@ use arrow2::array::{MutableArray, MutableBooleanArray}; +use arrow2::error::Result; #[test] fn set() { @@ -38,3 +39,10 @@ fn from_iter() { let a: MutableBooleanArray = iter.collect(); assert_eq!(a, MutableBooleanArray::from([Some(true), Some(true)])); } + +#[test] +fn try_from_trusted_len_iter() { + let iter = std::iter::repeat(Some(true)).take(2).map(Result::Ok); + let a = MutableBooleanArray::try_from_trusted_len_iter(iter).unwrap(); + assert_eq!(a, MutableBooleanArray::from([Some(true), Some(true)])); +} diff --git a/tests/it/array/mod.rs b/tests/it/array/mod.rs index 046e0a38079..63e63d01b25 100644 --- a/tests/it/array/mod.rs +++ b/tests/it/array/mod.rs @@ -27,6 +27,16 @@ fn nulls() { .into_iter() .all(|x| new_null_array(x, 10).null_count() == 10); assert!(a); + + // unions' null count is always 0 + let datatypes = vec![ + DataType::Union(vec![Field::new("a", DataType::Binary, true)], None, false), + DataType::Union(vec![Field::new("a", DataType::Binary, true)], None, true), + ]; + let a = datatypes + .into_iter() + .all(|x| new_null_array(x, 10).null_count() == 0); + assert!(a); } #[test] diff --git a/tests/it/array/primitive/mutable.rs b/tests/it/array/primitive/mutable.rs index 0adb4a534db..6fcda45b07a 100644 --- a/tests/it/array/primitive/mutable.rs +++ b/tests/it/array/primitive/mutable.rs @@ -3,6 +3,7 @@ use arrow2::{ bitmap::{Bitmap, MutableBitmap}, buffer::MutableBuffer, datatypes::DataType, + error::Result, }; use std::iter::FromIterator; @@ -140,3 +141,10 @@ fn set_values() { a.set_values(MutableBuffer::from([1, 3])); assert_eq!(a.values().as_slice(), [1, 3]); } + +#[test] +fn try_from_trusted_len_iter() { + let iter = std::iter::repeat(Some(1)).take(2).map(Result::Ok); + let a = MutablePrimitiveArray::try_from_trusted_len_iter(iter).unwrap(); + assert_eq!(a, MutablePrimitiveArray::from([Some(1), Some(1)])); +} diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index ecc7f46fe5f..7e0dfa73a12 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -21,3 +21,27 @@ fn display() -> Result<()> { Ok(()) } + +#[test] +fn slice() -> Result<()> { + let fields = vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ]; + let data_type = DataType::Union(fields, None, true); + let types = Buffer::from(&[0, 0, 1]); + let fields = vec![ + Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, + Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, + ]; + + let array = UnionArray::from_data(data_type.clone(), types, fields.clone(), None); + + let result = array.slice(1, 2); + + let types = Buffer::from(&[0, 1]); + let expected = UnionArray::from_data(data_type, types, fields, None); + + assert_eq!(expected, result); + Ok(()) +} diff --git a/tests/it/array/utf8/mod.rs b/tests/it/array/utf8/mod.rs index 6e8ad20949f..c9cc823a740 100644 --- a/tests/it/array/utf8/mod.rs +++ b/tests/it/array/utf8/mod.rs @@ -1,4 +1,4 @@ -use arrow2::{array::*, bitmap::Bitmap, buffer::Buffer}; +use arrow2::{array::*, bitmap::Bitmap, buffer::Buffer, error::Result}; mod mutable; @@ -53,9 +53,42 @@ fn from() { assert_eq!(a, &Bitmap::from([true, true, false])); } +#[test] +fn from_slice() { + let b = Utf8Array::::from_slice(&["a", "b", "cc"]); + + let offsets = Buffer::from(&[0, 1, 2, 4]); + let values = Buffer::from("abcc".as_bytes()); + assert_eq!(b, Utf8Array::::from_data(offsets, values, None)); +} + #[test] fn from_iter_values() { - let b = Utf8Array::::from_iter_values(vec!["a", "b", "cc"]); + let b = Utf8Array::::from_iter_values(["a", "b", "cc"].iter()); + + let offsets = Buffer::from(&[0, 1, 2, 4]); + let values = Buffer::from("abcc".as_bytes()); + assert_eq!(b, Utf8Array::::from_data(offsets, values, None)); +} + +#[test] +fn from_trusted_len_iter() { + let b = + Utf8Array::::from_trusted_len_iter(vec![Some("a"), Some("b"), Some("cc")].into_iter()); + + let offsets = Buffer::from(&[0, 1, 2, 4]); + let values = Buffer::from("abcc".as_bytes()); + assert_eq!(b, Utf8Array::::from_data(offsets, values, None)); +} + +#[test] +fn try_from_trusted_len_iter() { + let b = Utf8Array::::try_from_trusted_len_iter( + vec![Some("a"), Some("b"), Some("cc")] + .into_iter() + .map(Result::Ok), + ) + .unwrap(); let offsets = Buffer::from(&[0, 1, 2, 4]); let values = Buffer::from("abcc".as_bytes());