Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved performance of PrimitiveGrowable for nulls (-10%) #434

Merged
merged 1 commit into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/array/growable/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ pub struct GrowableBinary<'a, O: Offset> {
values: MutableBuffer<u8>,
offsets: MutableBuffer<O>,
length: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableBinary<'a, O> {
/// Creates a new [`GrowableBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a BinaryArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ pub struct GrowableBoolean<'a> {
data_type: DataType,
validity: MutableBitmap,
values: MutableBitmap,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a> GrowableBoolean<'a> {
/// Creates a new [`GrowableBoolean`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a BooleanArray>, mut use_validity: bool, capacity: usize) -> Self {
let data_type = arrays[0].data_type().clone();

Expand Down
39 changes: 20 additions & 19 deletions src/array/growable/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,27 @@ use std::sync::Arc;

use crate::{
array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
};

use super::{make_growable, utils::extend_validity, Growable};
use super::{
make_growable,
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`DictionaryArray`].
#[derive(Debug)]
/// # Implementation
/// This growable does not perform collision checks and instead concatenates
/// the values of each [`DictionaryArray`] one after the other.
pub struct GrowableDictionary<'a, K: DictionaryKey> {
keys_values: Vec<&'a [K]>,
keys_validities: Vec<&'a Option<Bitmap>>,
key_values: MutableBuffer<K>,
key_validity: MutableBitmap,
use_validity: bool,
offsets: Vec<usize>,
values: Arc<dyn Array>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

fn concatenate_values<K: DictionaryKey>(
Expand All @@ -36,6 +41,9 @@ fn concatenate_values<K: DictionaryKey>(
}

impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
/// Creates a new [`GrowableDictionary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: &[&'a DictionaryArray<T>], mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand All @@ -48,10 +56,11 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
.iter()
.map(|array| array.values().as_slice())
.collect::<Vec<_>>();
let keys_validities = arrays_keys

let extend_null_bits = arrays
.iter()
.map(|array| array.validity())
.collect::<Vec<_>>();
.map(|array| build_extend_null_bits(array.keys(), use_validity))
.collect();

let arrays_values = arrays
.iter()
Expand All @@ -63,11 +72,10 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
Self {
offsets,
values,
use_validity,
keys_values,
keys_validities,
key_values: MutableBuffer::with_capacity(capacity),
key_validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
}
}

Expand All @@ -85,13 +93,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
extend_validity(
&mut self.key_validity,
self.keys_validities[index],
start,
len,
self.use_validity,
);
(self.extend_null_bits[index])(&mut self.key_validity, start, len);

let values = &self.keys_values[index][start..start + len];
let offset = self.offsets[index];
Expand All @@ -104,8 +106,7 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {

#[inline]
fn extend_validity(&mut self, additional: usize) {
self.key_values
.resize(self.key_values.len() + additional, T::default());
self.key_values.extend_constant(additional, T::default());
self.key_validity.extend_constant(additional, false);
}

Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ pub struct GrowableFixedSizeBinary<'a> {
arrays: Vec<&'a FixedSizeBinaryArray>,
validity: MutableBitmap,
values: MutableBuffer<u8>,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
size: usize, // just a cache
}

impl<'a> GrowableFixedSizeBinary<'a> {
/// Creates a new [`GrowableFixedSizeBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a FixedSizeBinaryArray>,
mut use_validity: bool,
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ pub struct GrowableList<'a, O: Offset> {
values: Box<dyn Growable<'a> + 'a>,
offsets: MutableBuffer<O>,
last_offset: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableList<'a, O> {
/// Creates a new [`GrowableFixedSizeBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a ListArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
16 changes: 8 additions & 8 deletions src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ pub use dictionary::GrowableDictionary;

mod utils;

/// A trait describing a struct that can be extended from slices of pre-existing [`Array`]s.
/// This is used in operations where a new array is built out of other arrays such,
/// as filtering and concatenation.
/// Describes a struct that can be extended from slices of other pre-existing [`Array`]s.
/// This is used in operations where a new array is built out of other arrays such
/// as filter and concatenation.
pub trait Growable<'a> {
/// Extends this [`Growable`] with elements from the bounded [`Array`] at index `index` from
/// a slice starting at `start` and length `len`.
Expand All @@ -38,13 +38,13 @@ pub trait Growable<'a> {
/// Extends this [`Growable`] with null elements, disregarding the bound arrays
fn extend_validity(&mut self, additional: usize);

/// Converts itself to an `Arc<dyn Array>`, thereby finishing the mutation.
/// Self will be empty after such operation
/// Converts this [`Growable`] to an [`Arc<dyn Array>`], thereby finishing the mutation.
/// Self will be empty after such operation.
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
self.as_box().into()
}

/// Converts itself to an `Box<dyn Array>`, thereby finishing the mutation.
/// Converts this [`Growable`] to an [`Box<dyn Array>`], thereby finishing the mutation.
/// Self will be empty after such operation
fn as_box(&mut self) -> Box<dyn Array>;
}
Expand Down Expand Up @@ -82,11 +82,11 @@ macro_rules! dyn_dict_growable {
}};
}

/// Creates a new [`Growable`] from an arbitrary number of dynamic [`Array`]s.
/// Creates a new [`Growable`] from an arbitrary number of [`Array`]s.
/// # Panics
/// This function panics iff
/// * the arrays do not have the same [`DataType`].
/// * `arrays.is_empty`.
/// * `arrays.is_empty()`.
pub fn make_growable<'a>(
arrays: &[&'a dyn Array],
use_validity: bool,
Expand Down
1 change: 1 addition & 0 deletions src/array/growable/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ impl Default for GrowableNull {
}

impl GrowableNull {
/// Creates a new [`GrowableNull`].
pub fn new(data_type: DataType) -> Self {
Self {
data_type,
Expand Down
27 changes: 16 additions & 11 deletions src/array/growable/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use std::sync::Arc;

use crate::{
array::{Array, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
datatypes::DataType,
types::NativeType,
};

use super::{utils::extend_validity, Growable};
use super::{
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`PrimitiveArray`].
pub struct GrowablePrimitive<'a, T: NativeType> {
data_type: DataType,
arrays: Vec<&'a [T]>,
validities: Vec<&'a Option<Bitmap>>,
use_validity: bool,
validity: MutableBitmap,
values: MutableBuffer<T>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
/// Creates a new [`GrowablePrimitive`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a PrimitiveArray<T>>,
mut use_validity: bool,
Expand All @@ -33,10 +38,12 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
};

let data_type = arrays[0].data_type().clone();
let validities = arrays

let extend_null_bits = arrays
.iter()
.map(|array| array.validity())
.collect::<Vec<_>>();
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();

let arrays = arrays
.iter()
.map(|array| array.values().as_slice())
Expand All @@ -45,10 +52,9 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
Self {
data_type,
arrays,
validities,
use_validity,
values: MutableBuffer::with_capacity(capacity),
validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
}
}

Expand All @@ -64,8 +70,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
let validity = self.validities[index];
extend_validity(&mut self.validity, validity, start, len, self.use_validity);
(self.extend_null_bits[index])(&mut self.validity, start, len);

let values = self.arrays[index];
self.values.extend_from_slice(&values[start..start + len]);
Expand Down
5 changes: 2 additions & 3 deletions src/array/growable/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ pub struct GrowableStruct<'a> {
arrays: Vec<&'a StructArray>,
validity: MutableBitmap,
values: Vec<Box<dyn Growable<'a> + 'a>>,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a> GrowableStruct<'a> {
/// Creates a new [`GrowableStruct`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// This function panics if any of the `arrays` is not downcastable to `PrimitiveArray<T>`.
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a StructArray>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ pub struct GrowableUtf8<'a, O: Offset> {
values: MutableBuffer<u8>,
offsets: MutableBuffer<O>,
length: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableUtf8<'a, O> {
/// Creates a new [`GrowableUtf8`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a Utf8Array<O>>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
21 changes: 3 additions & 18 deletions src/array/growable/utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
array::{Array, Offset},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
};

Expand All @@ -18,6 +18,8 @@ pub(super) fn extend_offsets<T: Offset>(
});
}

// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
pub(super) type ExtendNullBits<'a> = Box<dyn Fn(&mut MutableBitmap, usize, usize) + 'a>;

pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> ExtendNullBits {
Expand All @@ -36,23 +38,6 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E
}
}

#[inline]
pub(super) fn extend_validity(
mutable_validity: &mut MutableBitmap,
validity: &Option<Bitmap>,
start: usize,
len: usize,
use_validity: bool,
) {
if let Some(bitmap) = validity {
assert!(start + len <= bitmap.len());
let (slice, offset, _) = bitmap.as_slice();
mutable_validity.extend_from_slice(slice, start + offset, len);
} else if use_validity {
mutable_validity.extend_constant(len, true);
};
}

#[inline]
pub(super) fn extend_offset_values<O: Offset>(
buffer: &mut MutableBuffer<u8>,
Expand Down