From 6d55d31bbea210293a0570b747cbdfe9d19a5f9c Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 6 Sep 2021 15:18:21 +0000 Subject: [PATCH 1/3] Fixed error in pushing bit. --- src/array/primitive/mutable.rs | 10 +++++----- src/bitmap/mutable.rs | 6 ++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index ee330188126..f9b2f572a70 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -164,8 +164,8 @@ impl MutablePrimitiveArray { if let Some(validity) = self.validity.as_mut() { extend_trusted_len_unzip(iterator, validity, &mut self.values) } else { - let mut validity = - MutableBitmap::from_trusted_len_iter(std::iter::repeat(true).take(self.len())); + let mut validity = MutableBitmap::new(); + validity.extend_constant(self.len(), true); extend_trusted_len_unzip(iterator, &mut validity, &mut self.values); if validity.null_count() > 0 { self.validity = Some(validity); @@ -449,10 +449,10 @@ pub(crate) unsafe fn extend_trusted_len_unzip( I: Iterator>, { let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); + let additional = upper.expect("trusted_len_unzip requires an upper limit"); - validity.reserve(len); - buffer.reserve(len); + validity.reserve(additional); + buffer.reserve(additional); for item in iterator { let item = if let Some(item) = item { diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index d3eae6689e5..4826ef7872c 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -98,10 +98,8 @@ impl MutableBitmap { if self.length % 8 == 0 { self.buffer.push_unchecked(0); } - if value { - let byte = self.buffer.as_mut_slice().last_mut().unwrap(); - *byte = set(*byte, self.length % 8, true); - }; + let byte = self.buffer.as_mut_slice().last_mut().unwrap(); + *byte = set(*byte, self.length % 8, value); self.length += 1; } From d5dcffd5dda709495e235fd287adcc559b9288d9 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 6 Sep 2021 12:59:21 +0200 Subject: [PATCH 2/3] improve MutablePrimitiveArray adds * extend_trusted_len_values * extend_trusted_len_values_unchecked * extend_from_slice --- src/array/primitive/mutable.rs | 46 +++++++++++++++++++++++++++++ src/compute/like.rs | 5 +++- tests/it/array/primitive/mutable.rs | 32 ++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index f9b2f572a70..a90ecde8eb6 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -172,6 +172,52 @@ impl MutablePrimitiveArray { } } } + /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len. + /// This differs from `extend_trusted_len` which accepts in iterator of optional values. + #[inline] + pub fn extend_trusted_len_values(&mut self, iterator: I) + where + I: TrustedLen, + { + unsafe { self.extend_trusted_len_values_unchecked(iterator) } + } + + /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len. + /// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values. + /// # Safety + /// The iterator must be trusted len. + #[inline] + pub unsafe fn extend_trusted_len_values_unchecked(&mut self, iterator: I) + where + I: Iterator, + { + let size = iterator + .size_hint() + .1 + .expect("upper bound should be set by trusted len iter"); + self.values.extend_from_trusted_len_iter_unchecked(iterator); + self.update_all_valid(size); + } + + #[inline] + /// Extends the [`MutablePrimitiveArray`] from a slice + pub fn extend_from_slice(&mut self, items: &[T]) { + self.values.extend_from_slice(items); + self.update_all_valid(items.len()); + } + + fn update_all_valid(&mut self, additional: usize) { + if let Some(validity) = self.validity.as_mut() { + validity.extend_constant(additional, true); + } else { + let validity = MutableBitmap::from_trusted_len_iter( + std::iter::repeat(true).take(self.len() + additional), + ); + if validity.null_count() > 0 { + self.validity = Some(validity); + } + } + } fn init_validity(&mut self) { let mut validity = MutableBitmap::new(); diff --git a/src/compute/like.rs b/src/compute/like.rs index bcd2ab71a96..1160721cef7 100644 --- a/src/compute/like.rs +++ b/src/compute/like.rs @@ -232,7 +232,10 @@ fn a_like_binary_scalar bool>( ) -> Result { let validity = lhs.validity(); let pattern = std::str::from_utf8(rhs).map_err(|e| { - ArrowError::InvalidArgumentError(format!("Unable to convert the LIKE pattern to string: {}", e)) + ArrowError::InvalidArgumentError(format!( + "Unable to convert the LIKE pattern to string: {}", + e + )) })?; let values = if !pattern.contains(is_like_pattern) { diff --git a/tests/it/array/primitive/mutable.rs b/tests/it/array/primitive/mutable.rs index f5f4cd381d7..ecc1fd8f994 100644 --- a/tests/it/array/primitive/mutable.rs +++ b/tests/it/array/primitive/mutable.rs @@ -126,6 +126,38 @@ fn extend_trusted_len() { assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 0, 4])); } +#[test] +fn extend_trusted_len_values() { + let mut a = MutablePrimitiveArray::::new(); + a.extend_trusted_len_values(vec![1, 2, 3].into_iter()); + assert_eq!(a.validity(), &None); + assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 3])); + + let mut a = MutablePrimitiveArray::::new(); + a.push(None); + a.extend_trusted_len_values(vec![1, 2].into_iter()); + assert_eq!( + a.validity(), + &Some(MutableBitmap::from([false, true, true])) + ); +} + +#[test] +fn extend_from_slice() { + let mut a = MutablePrimitiveArray::::new(); + a.extend_from_slice(&[1, 2, 3]); + assert_eq!(a.validity(), &None); + assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 3])); + + let mut a = MutablePrimitiveArray::::new(); + a.push(None); + a.extend_from_slice(&[1, 2]); + assert_eq!( + a.validity(), + &Some(MutableBitmap::from([false, true, true])) + ); +} + #[test] fn set_validity() { let mut a = MutablePrimitiveArray::::new(); From ddba3883012f008f655fc79c959c61300e3b9b5c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 6 Sep 2021 14:58:07 +0200 Subject: [PATCH 3/3] process comments and apply them to 'extend_trusted_len' as well --- src/array/primitive/mutable.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index a90ecde8eb6..64d8c245abb 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -191,28 +191,25 @@ impl MutablePrimitiveArray { where I: Iterator, { - let size = iterator - .size_hint() - .1 - .expect("upper bound should be set by trusted len iter"); self.values.extend_from_trusted_len_iter_unchecked(iterator); - self.update_all_valid(size); + self.update_all_valid(); } #[inline] /// Extends the [`MutablePrimitiveArray`] from a slice pub fn extend_from_slice(&mut self, items: &[T]) { self.values.extend_from_slice(items); - self.update_all_valid(items.len()); + self.update_all_valid(); } - fn update_all_valid(&mut self, additional: usize) { + fn update_all_valid(&mut self) { + // get len before mutable borrow + let len = self.len(); if let Some(validity) = self.validity.as_mut() { - validity.extend_constant(additional, true); + validity.extend_constant(len - validity.len(), true); } else { - let validity = MutableBitmap::from_trusted_len_iter( - std::iter::repeat(true).take(self.len() + additional), - ); + let mut validity = MutableBitmap::new(); + validity.extend_constant(len, true); if validity.null_count() > 0 { self.validity = Some(validity); }