Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved growable. (#434)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 22, 2021
1 parent 4c2f4dc commit cdbc958
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 69 deletions.
3 changes: 1 addition & 2 deletions src/array/growable/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ pub struct GrowableBinary<'a, O: Offset> {
values: MutableBuffer<u8>,
offsets: MutableBuffer<O>,
length: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableBinary<'a, O> {
/// Creates a new [`GrowableBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a BinaryArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ pub struct GrowableBoolean<'a> {
data_type: DataType,
validity: MutableBitmap,
values: MutableBitmap,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a> GrowableBoolean<'a> {
/// Creates a new [`GrowableBoolean`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a BooleanArray>, mut use_validity: bool, capacity: usize) -> Self {
let data_type = arrays[0].data_type().clone();

Expand Down
39 changes: 20 additions & 19 deletions src/array/growable/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,27 @@ use std::sync::Arc;

use crate::{
array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
};

use super::{make_growable, utils::extend_validity, Growable};
use super::{
make_growable,
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`DictionaryArray`].
#[derive(Debug)]
/// # Implementation
/// This growable does not perform collision checks and instead concatenates
/// the values of each [`DictionaryArray`] one after the other.
pub struct GrowableDictionary<'a, K: DictionaryKey> {
keys_values: Vec<&'a [K]>,
keys_validities: Vec<&'a Option<Bitmap>>,
key_values: MutableBuffer<K>,
key_validity: MutableBitmap,
use_validity: bool,
offsets: Vec<usize>,
values: Arc<dyn Array>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

fn concatenate_values<K: DictionaryKey>(
Expand All @@ -36,6 +41,9 @@ fn concatenate_values<K: DictionaryKey>(
}

impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
/// Creates a new [`GrowableDictionary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: &[&'a DictionaryArray<T>], mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand All @@ -48,10 +56,11 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
.iter()
.map(|array| array.values().as_slice())
.collect::<Vec<_>>();
let keys_validities = arrays_keys

let extend_null_bits = arrays
.iter()
.map(|array| array.validity())
.collect::<Vec<_>>();
.map(|array| build_extend_null_bits(array.keys(), use_validity))
.collect();

let arrays_values = arrays
.iter()
Expand All @@ -63,11 +72,10 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
Self {
offsets,
values,
use_validity,
keys_values,
keys_validities,
key_values: MutableBuffer::with_capacity(capacity),
key_validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
}
}

Expand All @@ -85,13 +93,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
extend_validity(
&mut self.key_validity,
self.keys_validities[index],
start,
len,
self.use_validity,
);
(self.extend_null_bits[index])(&mut self.key_validity, start, len);

let values = &self.keys_values[index][start..start + len];
let offset = self.offsets[index];
Expand All @@ -104,8 +106,7 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {

#[inline]
fn extend_validity(&mut self, additional: usize) {
self.key_values
.resize(self.key_values.len() + additional, T::default());
self.key_values.extend_constant(additional, T::default());
self.key_validity.extend_constant(additional, false);
}

Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ pub struct GrowableFixedSizeBinary<'a> {
arrays: Vec<&'a FixedSizeBinaryArray>,
validity: MutableBitmap,
values: MutableBuffer<u8>,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
size: usize, // just a cache
}

impl<'a> GrowableFixedSizeBinary<'a> {
/// Creates a new [`GrowableFixedSizeBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a FixedSizeBinaryArray>,
mut use_validity: bool,
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ pub struct GrowableList<'a, O: Offset> {
values: Box<dyn Growable<'a> + 'a>,
offsets: MutableBuffer<O>,
last_offset: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableList<'a, O> {
/// Creates a new [`GrowableFixedSizeBinary`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a ListArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
16 changes: 8 additions & 8 deletions src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ pub use dictionary::GrowableDictionary;

mod utils;

/// A trait describing a struct that can be extended from slices of pre-existing [`Array`]s.
/// This is used in operations where a new array is built out of other arrays such,
/// as filtering and concatenation.
/// Describes a struct that can be extended from slices of other pre-existing [`Array`]s.
/// This is used in operations where a new array is built out of other arrays such
/// as filter and concatenation.
pub trait Growable<'a> {
/// Extends this [`Growable`] with elements from the bounded [`Array`] at index `index` from
/// a slice starting at `start` and length `len`.
Expand All @@ -38,13 +38,13 @@ pub trait Growable<'a> {
/// Extends this [`Growable`] with null elements, disregarding the bound arrays
fn extend_validity(&mut self, additional: usize);

/// Converts itself to an `Arc<dyn Array>`, thereby finishing the mutation.
/// Self will be empty after such operation
/// Converts this [`Growable`] to an [`Arc<dyn Array>`], thereby finishing the mutation.
/// Self will be empty after such operation.
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
self.as_box().into()
}

/// Converts itself to an `Box<dyn Array>`, thereby finishing the mutation.
/// Converts this [`Growable`] to an [`Box<dyn Array>`], thereby finishing the mutation.
/// Self will be empty after such operation
fn as_box(&mut self) -> Box<dyn Array>;
}
Expand Down Expand Up @@ -82,11 +82,11 @@ macro_rules! dyn_dict_growable {
}};
}

/// Creates a new [`Growable`] from an arbitrary number of dynamic [`Array`]s.
/// Creates a new [`Growable`] from an arbitrary number of [`Array`]s.
/// # Panics
/// This function panics iff
/// * the arrays do not have the same [`DataType`].
/// * `arrays.is_empty`.
/// * `arrays.is_empty()`.
pub fn make_growable<'a>(
arrays: &[&'a dyn Array],
use_validity: bool,
Expand Down
1 change: 1 addition & 0 deletions src/array/growable/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ impl Default for GrowableNull {
}

impl GrowableNull {
/// Creates a new [`GrowableNull`].
pub fn new(data_type: DataType) -> Self {
Self {
data_type,
Expand Down
27 changes: 16 additions & 11 deletions src/array/growable/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use std::sync::Arc;

use crate::{
array::{Array, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
datatypes::DataType,
types::NativeType,
};

use super::{utils::extend_validity, Growable};
use super::{
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`PrimitiveArray`].
pub struct GrowablePrimitive<'a, T: NativeType> {
data_type: DataType,
arrays: Vec<&'a [T]>,
validities: Vec<&'a Option<Bitmap>>,
use_validity: bool,
validity: MutableBitmap,
values: MutableBuffer<T>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
/// Creates a new [`GrowablePrimitive`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a PrimitiveArray<T>>,
mut use_validity: bool,
Expand All @@ -33,10 +38,12 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
};

let data_type = arrays[0].data_type().clone();
let validities = arrays

let extend_null_bits = arrays
.iter()
.map(|array| array.validity())
.collect::<Vec<_>>();
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();

let arrays = arrays
.iter()
.map(|array| array.values().as_slice())
Expand All @@ -45,10 +52,9 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
Self {
data_type,
arrays,
validities,
use_validity,
values: MutableBuffer::with_capacity(capacity),
validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
}
}

Expand All @@ -64,8 +70,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
let validity = self.validities[index];
extend_validity(&mut self.validity, validity, start, len, self.use_validity);
(self.extend_null_bits[index])(&mut self.validity, start, len);

let values = self.arrays[index];
self.values.extend_from_slice(&values[start..start + len]);
Expand Down
5 changes: 2 additions & 3 deletions src/array/growable/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ pub struct GrowableStruct<'a> {
arrays: Vec<&'a StructArray>,
validity: MutableBitmap,
values: Vec<Box<dyn Growable<'a> + 'a>>,
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a> GrowableStruct<'a> {
/// Creates a new [`GrowableStruct`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// This function panics if any of the `arrays` is not downcastable to `PrimitiveArray<T>`.
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a StructArray>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
5 changes: 3 additions & 2 deletions src/array/growable/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ pub struct GrowableUtf8<'a, O: Offset> {
values: MutableBuffer<u8>,
offsets: MutableBuffer<O>,
length: O, // always equal to the last offset at `offsets`.
// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableUtf8<'a, O> {
/// Creates a new [`GrowableUtf8`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a Utf8Array<O>>, mut use_validity: bool, capacity: usize) -> Self {
// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
Expand Down
21 changes: 3 additions & 18 deletions src/array/growable/utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
array::{Array, Offset},
bitmap::{Bitmap, MutableBitmap},
bitmap::MutableBitmap,
buffer::MutableBuffer,
};

Expand All @@ -18,6 +18,8 @@ pub(super) fn extend_offsets<T: Offset>(
});
}

// function used to extend nulls from arrays. This function's lifetime is bound to the array
// because it reads nulls from it.
pub(super) type ExtendNullBits<'a> = Box<dyn Fn(&mut MutableBitmap, usize, usize) + 'a>;

pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> ExtendNullBits {
Expand All @@ -36,23 +38,6 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E
}
}

#[inline]
pub(super) fn extend_validity(
mutable_validity: &mut MutableBitmap,
validity: &Option<Bitmap>,
start: usize,
len: usize,
use_validity: bool,
) {
if let Some(bitmap) = validity {
assert!(start + len <= bitmap.len());
let (slice, offset, _) = bitmap.as_slice();
mutable_validity.extend_from_slice(slice, start + offset, len);
} else if use_validity {
mutable_validity.extend_constant(len, true);
};
}

#[inline]
pub(super) fn extend_offset_values<O: Offset>(
buffer: &mut MutableBuffer<u8>,
Expand Down

0 comments on commit cdbc958

Please sign in to comment.