Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
delay null_counts (#994)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored May 19, 2022
1 parent aafba7b commit 267cac0
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 32 deletions.
38 changes: 14 additions & 24 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{iter::FromIterator, sync::Arc};

use crate::bitmap::Bitmap;
use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
Expand All @@ -13,7 +14,7 @@ use super::PrimitiveArray;

/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutablePrimitiveArray<T: NativeType> {
data_type: DataType,
values: Vec<T>,
Expand All @@ -22,11 +23,14 @@ pub struct MutablePrimitiveArray<T: NativeType> {

impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
fn from(other: MutablePrimitiveArray<T>) -> Self {
let validity = if other.validity.as_ref().map(|x| x.null_count()).unwrap_or(0) > 0 {
other.validity.map(|x| x.into())
} else {
None
};
let validity = other.validity.and_then(|x| {
let bitmap: Bitmap = x.into();
if bitmap.null_count() == 0 {
None
} else {
Some(bitmap)
}
});

PrimitiveArray::<T>::new(other.data_type, other.values.into(), validity)
}
Expand Down Expand Up @@ -194,9 +198,7 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
if validity.null_count() > 0 {
self.validity = Some(validity);
}
self.validity = Some(validity);
}
}
/// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
Expand Down Expand Up @@ -526,11 +528,7 @@ impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>
})
.collect();

let validity = if validity.null_count() > 0 {
Some(validity)
} else {
None
};
let validity = Some(validity);

Self {
data_type: T::PRIMITIVE.into(),
Expand Down Expand Up @@ -588,11 +586,7 @@ where

extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);

let validity = if validity.null_count() > 0 {
Some(validity)
} else {
None
};
let validity = Some(validity);

(validity, buffer)
}
Expand Down Expand Up @@ -634,11 +628,7 @@ where
buffer.set_len(len);
null.set_len(len);

let validity = if null.null_count() > 0 {
Some(null)
} else {
None
};
let validity = Some(null);

Ok((validity, buffer))
}
Expand Down
16 changes: 11 additions & 5 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{

use super::Utf8Array;
use crate::array::physical_binary::*;
use crate::bitmap::Bitmap;

struct StrAsBytes<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
Expand All @@ -36,12 +37,21 @@ impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O> {
// Safety:
// `MutableUtf8Array` has the same invariants as `Utf8Array` and thus
// `Utf8Array` can be safely created from `MutableUtf8Array` without checks.
let validity = other.validity.and_then(|x| {
let bitmap: Bitmap = x.into();
if bitmap.null_count() == 0 {
None
} else {
Some(bitmap)
}
});

unsafe {
Utf8Array::<O>::from_data_unchecked(
other.data_type,
other.offsets.into(),
other.values.into(),
other.validity.map(|x| x.into()),
validity,
)
}
}
Expand Down Expand Up @@ -368,10 +378,6 @@ impl<O: Offset> MutableUtf8Array<O> {
self.validity.as_mut().unwrap(),
iterator,
);

if self.validity.as_mut().unwrap().null_count() == 0 {
self.validity = None;
}
}

/// Creates a [`MutableUtf8Array`] from an iterator of trusted length.
Expand Down
1 change: 1 addition & 0 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use super::Bitmap;
/// The main difference against [`Vec<bool>`] is that a bitmap cannot be represented as `&[bool]`.
/// # Implementation
/// This container is backed by [`Vec<u8>`].
#[derive(Clone)]
pub struct MutableBitmap {
buffer: Vec<u8>,
// invariant: length.saturating_add(7) / 8 == buffer.len();
Expand Down
15 changes: 12 additions & 3 deletions tests/it/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ fn set() {
fn from_iter() {
let a = MutablePrimitiveArray::<i32>::from_iter((0..2).map(Some));
assert_eq!(a.len(), 2);
assert_eq!(a.validity(), None);
let validity = a.validity().unwrap();
assert_eq!(validity.null_count(), 0);
}

#[test]
Expand Down Expand Up @@ -176,7 +177,8 @@ fn from_trusted_len() {
fn extend_trusted_len() {
let mut a = MutablePrimitiveArray::<i32>::new();
a.extend_trusted_len(vec![Some(1), Some(2)].into_iter());
assert_eq!(a.validity(), None);
let validity = a.validity().unwrap();
assert_eq!(validity.null_count(), 0);
a.extend_trusted_len(vec![None, Some(4)].into_iter());
assert_eq!(
a.validity(),
Expand Down Expand Up @@ -266,7 +268,14 @@ fn extend_from_slice() {
fn set_validity() {
let mut a = MutablePrimitiveArray::<i32>::new();
a.extend_trusted_len(vec![Some(1), Some(2)].into_iter());
assert_eq!(a.validity(), None);
let validity = a.validity().unwrap();
assert_eq!(validity.null_count(), 0);

// test that upon conversion to array the bitmap is set to None
let arr: PrimitiveArray<_> = a.clone().into();
assert_eq!(arr.validity(), None);

// test set_validity
a.set_validity(Some(MutableBitmap::from([false, true])));
assert_eq!(a.validity(), Some(&MutableBitmap::from([false, true])));
}
Expand Down

0 comments on commit 267cac0

Please sign in to comment.