From 4e711bdcd471e14e7ffa9289b657308a321320b1 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 17:42:31 +0530 Subject: [PATCH 1/6] Add extends method for `MutableBooleanArray` --- src/array/boolean/mutable.rs | 116 +++++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index a776b7a12bc..39a626f59cc 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -104,6 +104,69 @@ impl MutableBooleanArray { } } + /// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len. + /// This differs from `extend_trusted_len` which accepts in iterator of optional values. + #[inline] + pub fn extend_trusted_len_values(&mut self, iterator: I) + where + I: TrustedLen, + { + unsafe { self.extend_trusted_len_values_unchecked(iterator) } + } + + /// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len. + /// This differs from `extend_trusted_len_unchecked`, which accepts in iterator of optional values. + /// # Safety + /// The iterator must be trusted len. + #[inline] + pub unsafe fn extend_trusted_len_values_unchecked(&mut self, iterator: I) + where + I: Iterator, + { + let (_, upper) = iterator.size_hint(); + let additional = + upper.expect("extend_trusted_len_values_unchecked requires an upper limit"); + + if let Some(validity) = self.validity.as_mut() { + validity.extend_constant(additional, true); + } + + self.values.extend_from_trusted_len_iter_unchecked(iterator) + } + + /// Extends the [`MutableBooleanArray`] from an iterator of trusted len. + #[inline] + pub fn extend_trusted_len(&mut self, iterator: I) + where + P: std::borrow::Borrow, + I: TrustedLen>, + { + unsafe { self.extend_trusted_len_unchecked(iterator) } + } + + /// Extends the [`MutableBooleanArray`] from an iterator of trusted len. + /// # Safety + /// The iterator must be trusted len. + #[inline] + pub unsafe fn extend_trusted_len_unchecked(&mut self, iterator: I) + where + P: std::borrow::Borrow, + I: Iterator>, + { + if let Some(validity) = self.validity.as_mut() { + extend_trusted_len_unzip(iterator, validity, &mut self.values); + } else { + let mut validity = MutableBitmap::new(); + validity.extend_constant(self.len(), true); + + extend_trusted_len_unzip(iterator, &mut validity, &mut self.values); + + if validity.null_count() > 0 { + self.validity = Some(validity); + } + } + } + fn init_validity(&mut self) { let mut validity = MutableBitmap::new(); validity.extend_constant(self.len(), true); @@ -181,12 +244,6 @@ impl MutableBooleanArray { { let (validity, values) = trusted_len_unzip(iterator); - let validity = if validity.null_count() > 0 { - Some(validity) - } else { - None - }; - Self::from_data(DataType::Boolean, values, validity) } @@ -240,33 +297,52 @@ impl MutableBooleanArray { /// # Safety /// The caller must ensure that `iterator` is `TrustedLen`. #[inline] -pub(crate) unsafe fn trusted_len_unzip(iterator: I) -> (MutableBitmap, MutableBitmap) +pub(crate) unsafe fn trusted_len_unzip(iterator: I) -> (Option, MutableBitmap) where P: std::borrow::Borrow, I: Iterator>, +{ + let mut validity = MutableBitmap::new(); + let mut values = MutableBitmap::new(); + + extend_trusted_len_unzip(iterator, &mut validity, &mut values); + + let validity = if validity.null_count() > 0 { + Some(validity) + } else { + None + }; + + (validity, values) +} + +/// Extends validity [`MutableBitmap`] and values [`MutableBitmap`] from an iterator of `Option`. +/// # Safety +/// The caller must ensure that `iterator` is `TrustedLen`. +unsafe fn extend_trusted_len_unzip( + iterator: I, + validity: &mut MutableBitmap, + values: &mut MutableBitmap, +) where + P: std::borrow::Borrow, + I: Iterator>, { let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); + let additional = upper.expect("extend_trusted_len_unzip requires an upper limit"); - let mut validity = MutableBitmap::with_capacity(len); - let mut values = MutableBitmap::with_capacity(len); + validity.reserve(additional); + values.reserve(additional); for item in iterator { let item = if let Some(item) = item { - validity.push(true); + validity.push_unchecked(true); *item.borrow() } else { - validity.push(false); - false + validity.push_unchecked(false); + bool::default() }; - values.push(item); + values.push_unchecked(item); } - assert_eq!( - values.len(), - len, - "Trusted iterator length was not accurately reported" - ); - (validity, values) } /// # Safety From 6bcfb08cb908cf2850ff1452886d86bf8fb0b8d8 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 17:43:11 +0530 Subject: [PATCH 2/6] Add tests for `MutableBooleanArray` extend methods --- tests/it/array/boolean/mutable.rs | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/it/array/boolean/mutable.rs b/tests/it/array/boolean/mutable.rs index 6b4522b99b9..919d652ef50 100644 --- a/tests/it/array/boolean/mutable.rs +++ b/tests/it/array/boolean/mutable.rs @@ -64,3 +64,35 @@ fn reserve() { assert!(a.validity().unwrap().capacity() > 0); assert!(a.values().capacity() > 0) } + +#[test] +fn extend_trusted_len() { + let mut a = MutableBooleanArray::new(); + + a.extend_trusted_len(vec![Some(true), Some(false)].into_iter()); + assert_eq!(a.validity(), None); + + a.extend_trusted_len(vec![None, Some(true)].into_iter()); + assert_eq!( + a.validity(), + Some(&MutableBitmap::from([true, true, false, true])) + ); + assert_eq!(a.values(), &MutableBitmap::from([true, false, false, true])); +} + +#[test] +fn extend_trusted_len_values() { + let mut a = MutableBooleanArray::new(); + + a.extend_trusted_len_values(vec![true, true, false].into_iter()); + assert_eq!(a.validity(), None); + assert_eq!(a.values(), &MutableBitmap::from([true, true, false])); + + let mut a = MutableBooleanArray::new(); + a.push(None); + a.extend_trusted_len_values(vec![true, false].into_iter()); + assert_eq!( + a.validity(), + Some(&MutableBitmap::from([false, true, true])) + ); +} From d200231ae56b5364f1c6817d644a592fce4ace51 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 18:10:16 +0530 Subject: [PATCH 3/6] Make `extend_trusted_len_unzip` `pub(crate)` --- src/array/boolean/mutable.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index 39a626f59cc..9e6956a80f7 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -319,7 +319,8 @@ where /// Extends validity [`MutableBitmap`] and values [`MutableBitmap`] from an iterator of `Option`. /// # Safety /// The caller must ensure that `iterator` is `TrustedLen`. -unsafe fn extend_trusted_len_unzip( +#[inline] +pub(crate) unsafe fn extend_trusted_len_unzip( iterator: I, validity: &mut MutableBitmap, values: &mut MutableBitmap, From 0412e5e9e34701eb49eb53602469ea1b3be93aef Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 19:26:28 +0530 Subject: [PATCH 4/6] Add safety comments for unsafe code --- src/array/boolean/mutable.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index 9e6956a80f7..67b0fc359a7 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -111,6 +111,7 @@ impl MutableBooleanArray { where I: TrustedLen, { + // Safety: `I` is `TrustedLen` unsafe { self.extend_trusted_len_values_unchecked(iterator) } } @@ -141,6 +142,7 @@ impl MutableBooleanArray { P: std::borrow::Borrow, I: TrustedLen>, { + // Safety: `I` is `TrustedLen` unsafe { self.extend_trusted_len_unchecked(iterator) } } @@ -254,6 +256,7 @@ impl MutableBooleanArray { P: std::borrow::Borrow, I: TrustedLen>, { + // Safety: `I` is `TrustedLen` unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } @@ -287,6 +290,7 @@ impl MutableBooleanArray { P: std::borrow::Borrow, I: TrustedLen, E>>, { + // Safety: `I` is `TrustedLen` unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } } } From 5d06bf0e1eacf53db2549747b5ae4e8a2242d945 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 19:41:27 +0530 Subject: [PATCH 5/6] Add missing unittest assertion --- tests/it/array/boolean/mutable.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/it/array/boolean/mutable.rs b/tests/it/array/boolean/mutable.rs index 919d652ef50..e56ccadb72c 100644 --- a/tests/it/array/boolean/mutable.rs +++ b/tests/it/array/boolean/mutable.rs @@ -95,4 +95,5 @@ fn extend_trusted_len_values() { a.validity(), Some(&MutableBitmap::from([false, true, true])) ); + assert_eq!(a.values(), &MutableBitmap::from([false, true, false])); } From cf1adcb58e5b328a50cab66c7853e0f86c9e525a Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 30 Sep 2021 19:45:10 +0530 Subject: [PATCH 6/6] Add debug_assert statement for `extend_trusted_len_unzip` --- src/array/boolean/mutable.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index 67b0fc359a7..b27e7b2bb2d 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -335,6 +335,10 @@ pub(crate) unsafe fn extend_trusted_len_unzip( let (_, upper) = iterator.size_hint(); let additional = upper.expect("extend_trusted_len_unzip requires an upper limit"); + // Length of the array before new values are pushed, + // variable created for assertion post operation + let pre_length = values.len(); + validity.reserve(additional); values.reserve(additional); @@ -348,6 +352,12 @@ pub(crate) unsafe fn extend_trusted_len_unzip( }; values.push_unchecked(item); } + + debug_assert_eq!( + values.len(), + pre_length + additional, + "Trusted iterator length was not accurately reported" + ); } /// # Safety