Skip to content

Commit

Permalink
growable array [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 28, 2023
1 parent 750a094 commit 4285c8e
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 31 deletions.
18 changes: 15 additions & 3 deletions crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod view;

use std::any::Any;
use std::marker::PhantomData;
use std::sync::Arc;

pub use mutable::*;
use polars_error::*;
Expand Down Expand Up @@ -74,10 +75,9 @@ impl ViewType for [u8] {
pub struct BinaryViewArrayGeneric<T: ViewType + ?Sized> {
data_type: ArrowDataType,
views: Buffer<u128>,
// Maybe Arc<[Buffer<u8>]>?
buffers: Vec<Buffer<u8>>,
buffers: Arc<[Buffer<u8>]>,
// Raw buffer access. (pointer, len).
raw_buffers: Vec<(*const u8, usize)>,
raw_buffers: Arc<[(*const u8, usize)]>,
validity: Option<Bitmap>,
phantom: PhantomData<T>,
}
Expand Down Expand Up @@ -127,6 +127,14 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
}
}

pub fn buffers(&self) -> &[Buffer<u8>] {
self.buffers.as_ref()
}

pub fn views(&self) -> &Buffer<u128> {
&self.views
}

pub fn try_new(
data_type: ArrowDataType,
views: Buffer<u128>,
Expand Down Expand Up @@ -222,6 +230,10 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryViewArrayGeneric<T>> {
NonNullValuesIter::new(self, self.validity())
}

impl_sliced!();
impl_mut_validity!();
impl_into_array!();
}

impl<T: ViewType + ?Sized> Array for BinaryViewArrayGeneric<T> {
Expand Down
62 changes: 34 additions & 28 deletions crates/polars-arrow/src/array/growable/binview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@ use super::Growable;
use crate::array::binview::{BinaryViewArrayGeneric, MutableBinaryViewArray, ViewType};
use crate::array::{Array, BinaryArray};
use crate::bitmap::MutableBitmap;
use crate::buffer::Buffer;
use crate::datatypes::ArrowDataType;
use crate::offset::{Offset, Offsets};

/// Concrete [`Growable`] for the [`BinaryArray`].
pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> {
arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
data_type: ArrowDataType,
mutable: MutableBinaryViewArray<T>,
validity: MutableBitmap::with_capacity(capacity),
views: Vec<u128>,
buffers: Vec<Buffer<u8>>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}

impl<'a, O: Offset> GrowableBinary<'a, O> {
/// Creates a new [`GrowableBinary`] bound to `arrays` with a pre-allocated `capacity`.
impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
/// Creates a new [`GrowableBinaryViewArray`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(arrays: Vec<&'a BinaryArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
pub fn new(arrays: Vec<&'a BinaryViewArrayGeneric<T>>, mut use_validity: bool, capacity: usize) -> Self {
let data_type = arrays[0].data_type().clone();

// if any of the arrays has nulls, insertions from any array requires setting bits
Expand All @@ -34,50 +37,53 @@ impl<'a, O: Offset> GrowableBinary<'a, O> {
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();

let n_buffers = arrays.iter().map(|binview| binview.buffers().len()).sum::<usize>();

Self {
arrays,
data_type,
values: Vec::with_capacity(0),
offsets: Offsets::with_capacity(capacity),
validity: MutableBitmap::with_capacity(capacity),
views: Vec::with_capacity(capacity),
buffers: Vec::with_capacity(n_buffers),
extend_null_bits,
}
}

fn to(&mut self) -> BinaryArray<O> {
let data_type = self.data_type.clone();
let validity = std::mem::take(&mut self.validity);
let offsets = std::mem::take(&mut self.offsets);
let values = std::mem::take(&mut self.values);

BinaryArray::<O>::new(data_type, offsets.into(), values.into(), validity.into())
fn to(&mut self) -> BinaryViewArrayGeneric<T> {
// let mutable = std::mem::take(&mut self.mutable);
// let out = mutable.into();
// debug_assert!(out.data_type() == &self.data_type);
// out
todo!()
}
}

impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> {
impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> {
fn extend(&mut self, index: usize, start: usize, len: usize) {
(self.extend_null_bits[index])(&mut self.validity, start, len);

let array = self.arrays[index];
let offsets = array.offsets();
let values = array.values();

self.offsets
.try_extend_from_slice(offsets, start, len)
.unwrap();
let buffer_offset: u32 = self.buffers.len().try_into().expect("unsupported");
let buffer_offset = (buffer_offset as u128) << 64;

// values
extend_offset_values::<O>(&mut self.values, offsets.buffer(), values, start, len);
let range = start..start + len;
self.buffers.extend_from_slice(&array.buffers()[range]);
self.views.extend(array.views()[range.clone()].iter().map(|&view| {
// If null the buffer index is ignored because the length is 0,
// so we can just do this
view + buffer_offset
}));
}

fn extend_validity(&mut self, additional: usize) {
self.offsets.extend_constant(additional);
self.views.extend(std::iter::repeat(0).take(additional));
self.validity.extend_constant(additional, false);
}

#[inline]
fn len(&self) -> usize {
self.offsets.len() - 1
self.views.len()
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Expand All @@ -89,12 +95,12 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> {
}
}

impl<'a, O: Offset> From<GrowableBinary<'a, O>> for BinaryArray<O> {
fn from(val: GrowableBinary<'a, O>) -> Self {
BinaryArray::<O>::new(
impl<'a, T: ViewType + ?Sized> From<GrowableBinaryViewArray<'a, T>> for BinaryViewArrayGeneric<T> {
fn from(val: GrowableBinaryViewArray<'a, T>) -> Self {
BinaryViewArrayGeneric::<T>::new_unchecked(
val.data_type,
val.offsets.into(),
val.values.into(),
val.views.into(),
val.buffers
val.validity.into(),
)
}
Expand Down

0 comments on commit 4285c8e

Please sign in to comment.