Skip to content

Commit

Permalink
fix reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ariesdevil committed Mar 12, 2024
1 parent a6410ce commit 1fd109b
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,29 @@
// under the License.

use crate::array::print_long_array;
use crate::builder::GenericBytesViewBuilder;
use crate::builder::GenericByteViewBuilder;
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::BytesViewType;
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder, BytesView};
use arrow_data::{ArrayData, ArrayDataBuilder, ByteView};
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::Arc;

/// An array of variable length bytes view arrays
pub struct GenericBytesViewArray<T: BytesViewType + ?Sized> {
pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
data_type: DataType,
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
phantom: PhantomData<T>,
nulls: Option<NullBuffer>,
}

impl<T: BytesViewType + ?Sized> Clone for GenericBytesViewArray<T> {
impl<T: ByteViewType + ?Sized> Clone for GenericByteViewArray<T> {
fn clone(&self) -> Self {
Self {
data_type: T::DATA_TYPE,
Expand All @@ -50,22 +50,22 @@ impl<T: BytesViewType + ?Sized> Clone for GenericBytesViewArray<T> {
}
}

impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
/// Create a new [`GenericBytesViewArray`] from the provided parts, panicking on failure
impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// Create a new [`GenericByteViewArray`] from the provided parts, panicking on failure
///
/// # Panics
///
/// Panics if [`GenericBytesViewArray::try_new`] returns an error
/// Panics if [`GenericByteViewArray::try_new`] returns an error
pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
Self::try_new(views, buffers, nulls).unwrap()
}

/// Create a new [`GenericBytesViewArray`] from the provided parts, returning an error on failure
/// Create a new [`GenericByteViewArray`] from the provided parts, returning an error on failure
///
/// # Errors
///
/// * `views.len() != nulls.len()`
/// * [BytesViewType::validate] fails
/// * [ByteViewType::validate] fails
pub fn try_new(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
Expand Down Expand Up @@ -93,7 +93,7 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
})
}

/// Create a new [`GenericBytesViewArray`] from the provided parts, without validation
/// Create a new [`GenericByteViewArray`] from the provided parts, without validation
///
/// # Safety
///
Expand All @@ -112,7 +112,7 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
}
}

/// Create a new [`GenericBytesViewArray`] of length `len` where all values are null
/// Create a new [`GenericByteViewArray`] of length `len` where all values are null
pub fn new_null(len: usize) -> Self {
Self {
data_type: T::DATA_TYPE,
Expand All @@ -123,14 +123,14 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
}
}

/// Creates a [`GenericBytesViewArray`] based on an iterator of values without nulls
/// Creates a [`GenericByteViewArray`] based on an iterator of values without nulls
pub fn from_iter_values<Ptr, I>(iter: I) -> Self
where
Ptr: AsRef<T::Native>,
I: IntoIterator<Item = Ptr>,
{
let iter = iter.into_iter();
let mut builder = GenericBytesViewBuilder::<T>::with_capacity(iter.size_hint().0);
let mut builder = GenericByteViewBuilder::<T>::with_capacity(iter.size_hint().0);
for v in iter {
builder.append_value(v);
}
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
let ptr = self.views.as_ptr() as *const u8;
std::slice::from_raw_parts(ptr.add(idx * 16 + 4), len as usize)
} else {
let view = BytesView::from(*v);
let view = ByteView::from(*v);
let data = self.buffers.get_unchecked(view.buffer_index as usize);
let offset = view.offset as usize;
data.get_unchecked(offset..offset + len as usize)
Expand All @@ -204,7 +204,7 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewArray<T> {
}
}

impl<T: BytesViewType + ?Sized> Debug for GenericBytesViewArray<T> {
impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}ViewArray\n[\n", T::PREFIX)?;
print_long_array(self, f, |array, index, f| {
Expand All @@ -214,7 +214,7 @@ impl<T: BytesViewType + ?Sized> Debug for GenericBytesViewArray<T> {
}
}

impl<T: BytesViewType + ?Sized> Array for GenericBytesViewArray<T> {
impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
fn as_any(&self) -> &dyn Any {
self
}
Expand Down Expand Up @@ -265,19 +265,19 @@ impl<T: BytesViewType + ?Sized> Array for GenericBytesViewArray<T> {
}
}

impl<'a, T: BytesViewType + ?Sized> ArrayAccessor for &'a GenericBytesViewArray<T> {
impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T> {
type Item = &'a T::Native;

fn value(&self, index: usize) -> Self::Item {
GenericBytesViewArray::value(self, index)
GenericByteViewArray::value(self, index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
GenericBytesViewArray::value_unchecked(self, index)
GenericByteViewArray::value_unchecked(self, index)
}
}

impl<'a, T: BytesViewType + ?Sized> IntoIterator for &'a GenericBytesViewArray<T> {
impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T> {
type Item = Option<&'a T::Native>;
type IntoIter = ArrayIter<Self>;

Expand All @@ -286,7 +286,7 @@ impl<'a, T: BytesViewType + ?Sized> IntoIterator for &'a GenericBytesViewArray<T
}
}

impl<T: BytesViewType + ?Sized> From<ArrayData> for GenericBytesViewArray<T> {
impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
fn from(value: ArrayData) -> Self {
let views = value.buffers()[0].clone();
let views = ScalarBuffer::new(views, value.offset(), value.len());
Expand All @@ -301,8 +301,8 @@ impl<T: BytesViewType + ?Sized> From<ArrayData> for GenericBytesViewArray<T> {
}
}

impl<T: BytesViewType + ?Sized> From<GenericBytesViewArray<T>> for ArrayData {
fn from(mut array: GenericBytesViewArray<T>) -> Self {
impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
fn from(mut array: GenericByteViewArray<T>) -> Self {
let len = array.len();
array.buffers.insert(0, array.views.into_inner());
let builder = ArrayDataBuilder::new(T::DATA_TYPE)
Expand All @@ -314,30 +314,30 @@ impl<T: BytesViewType + ?Sized> From<GenericBytesViewArray<T>> for ArrayData {
}
}

impl<Ptr, T: BytesViewType + ?Sized> FromIterator<Option<Ptr>> for GenericBytesViewArray<T>
impl<Ptr, T: ByteViewType + ?Sized> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
where
Ptr: AsRef<T::Native>,
{
fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
let iter = iter.into_iter();
let mut builder = GenericBytesViewBuilder::<T>::with_capacity(iter.size_hint().0);
let mut builder = GenericByteViewBuilder::<T>::with_capacity(iter.size_hint().0);
builder.extend(iter);
builder.finish()
}
}

/// A [`GenericBytesViewArray`] of `[u8]`
pub type BinaryViewArray = GenericBytesViewArray<[u8]>;
/// A [`GenericByteViewArray`] of `[u8]`
pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;

/// A [`GenericBytesViewArray`] of `str`
/// A [`GenericByteViewArray`] of `str`
///
/// ```
/// use arrow_array::StringViewArray;
/// let array = StringViewArray::from_iter_values(vec!["hello", "world", "lulu", "large payload over 12 bytes"]);
/// assert_eq!(array.value(0), "hello");
/// assert_eq!(array.value(3), "large payload over 12 bytes");
/// ```
pub type StringViewArray = GenericBytesViewArray<str>;
pub type StringViewArray = GenericByteViewArray<StringViewType>;

impl From<Vec<&str>> for StringViewArray {
fn from(v: Vec<&str>) -> Self {
Expand All @@ -348,7 +348,6 @@ impl From<Vec<&str>> for StringViewArray {
#[cfg(test)]
mod tests {
use crate::builder::StringViewBuilder;
use crate::types::BytesViewType;
use crate::{Array, BinaryViewArray, StringViewArray};

#[test]
Expand All @@ -363,10 +362,10 @@ mod tests {
assert_eq!(array.value(3), "large payload over 12 bytes");

let array = BinaryViewArray::from_iter_values(vec![
b"hello".to_bytes(),
b"world".to_bytes(),
b"lulu".to_bytes(),
b"large payload over 12 bytes".to_bytes(),
b"hello".as_slice(),
b"world".as_slice(),
b"lulu".as_slice(),
b"large payload over 12 bytes".as_slice(),
]);
assert_eq!(array.value(0), b"hello");
assert_eq!(array.value(3), b"large payload over 12 bytes");
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ mod run_array;

pub use run_array::*;

mod bytes_view_array;
mod byte_view_array;

pub use bytes_view_array::*;
pub use byte_view_array::*;

/// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
pub trait Array: std::fmt::Debug + Send + Sync {
Expand Down
40 changes: 20 additions & 20 deletions arrow-array/src/builder/generic_bytes_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
// under the License.

use crate::builder::ArrayBuilder;
use crate::types::BytesViewType;
use crate::{ArrayRef, GenericBytesViewArray};
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
use crate::{ArrayRef, GenericByteViewArray};
use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
use arrow_data::BytesView;
use arrow_data::ByteView;
use std::any::Any;
use std::marker::PhantomData;
use std::sync::Arc;

const DEFAULT_BLOCK_SIZE: u32 = 8 * 1024;

/// A builder for [`GenericBytesViewArray`]
/// A builder for [`GenericByteViewArray`]
///
/// See [`Self::append_value`] for the allocation strategy
pub struct GenericBytesViewBuilder<T: BytesViewType + ?Sized> {
pub struct GenericByteViewBuilder<T: ByteViewType + ?Sized> {
views_builder: BufferBuilder<u128>,
null_buffer_builder: NullBufferBuilder,
completed: Vec<Buffer>,
Expand All @@ -38,7 +38,7 @@ pub struct GenericBytesViewBuilder<T: BytesViewType + ?Sized> {
phantom: PhantomData<T>,
}

impl<T: BytesViewType + ?Sized> GenericBytesViewBuilder<T> {
impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
/// Creates a new [`GenericByteViewBuilder`].
pub fn new() -> Self {
Self::with_capacity(1024)
Expand Down Expand Up @@ -93,7 +93,7 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewBuilder<T> {
let offset = self.in_progress.len() as u32;
self.in_progress.extend_from_slice(v);

let view = BytesView {
let view = ByteView {
length,
prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
buffer_index: self.completed.len() as u32,
Expand All @@ -119,8 +119,8 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewBuilder<T> {
self.views_builder.append(0);
}

/// Builds the [`GenericBytesViewArray`] and reset this builder
pub fn finish(&mut self) -> GenericBytesViewArray<T> {
/// Builds the [`GenericByteViewArray`] and reset this builder
pub fn finish(&mut self) -> GenericByteViewArray<T> {
let mut completed = std::mem::take(&mut self.completed);
if !self.in_progress.is_empty() {
completed.push(std::mem::take(&mut self.in_progress).into());
Expand All @@ -129,11 +129,11 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewBuilder<T> {
let views = ScalarBuffer::new(self.views_builder.finish(), 0, len);
let nulls = self.null_buffer_builder.finish();
// SAFETY: valid by construction
unsafe { GenericBytesViewArray::new_unchecked(views, completed, nulls) }
unsafe { GenericByteViewArray::new_unchecked(views, completed, nulls) }
}

/// Builds the [`GenericBytesViewArray`] without resetting the builder
pub fn finish_cloned(&self) -> GenericBytesViewArray<T> {
/// Builds the [`GenericByteViewArray`] without resetting the builder
pub fn finish_cloned(&self) -> GenericByteViewArray<T> {
let mut completed = self.completed.clone();
if !self.in_progress.is_empty() {
completed.push(Buffer::from_slice_ref(&self.in_progress));
Expand All @@ -143,17 +143,17 @@ impl<T: BytesViewType + ?Sized> GenericBytesViewBuilder<T> {
let views = ScalarBuffer::new(views, 0, len);
let nulls = self.null_buffer_builder.finish_cloned();
// SAFETY: valid by construction
unsafe { GenericBytesViewArray::new_unchecked(views, completed, nulls) }
unsafe { GenericByteViewArray::new_unchecked(views, completed, nulls) }
}
}

impl<T: BytesViewType + ?Sized> Default for GenericBytesViewBuilder<T> {
impl<T: ByteViewType + ?Sized> Default for GenericByteViewBuilder<T> {
fn default() -> Self {
Self::new()
}
}

impl<T: BytesViewType + ?Sized> std::fmt::Debug for GenericBytesViewBuilder<T> {
impl<T: ByteViewType + ?Sized> std::fmt::Debug for GenericByteViewBuilder<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}ViewBuilder", T::PREFIX)?;
f.debug_struct("")
Expand All @@ -165,7 +165,7 @@ impl<T: BytesViewType + ?Sized> std::fmt::Debug for GenericBytesViewBuilder<T> {
}
}

impl<T: BytesViewType + ?Sized> ArrayBuilder for GenericBytesViewBuilder<T> {
impl<T: ByteViewType + ?Sized> ArrayBuilder for GenericByteViewBuilder<T> {
fn len(&self) -> usize {
self.null_buffer_builder.len()
}
Expand All @@ -191,8 +191,8 @@ impl<T: BytesViewType + ?Sized> ArrayBuilder for GenericBytesViewBuilder<T> {
}
}

impl<T: BytesViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
for GenericBytesViewBuilder<T>
impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
for GenericByteViewBuilder<T>
{
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
Expand All @@ -206,10 +206,10 @@ impl<T: BytesViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
///
/// Values can be appended using [`GenericByteViewBuilder::append_value`], and nulls with
/// [`GenericByteViewBuilder::append_null`] as normal.
pub type StringViewBuilder = GenericBytesViewBuilder<str>;
pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;

/// Array builder for [`BinaryViewArray`][crate::BinaryViewArray]
///
/// Values can be appended using [`GenericByteViewBuilder::append_value`], and nulls with
/// [`GenericByteViewBuilder::append_null`] as normal.
pub type BinaryViewBuilder = GenericBytesViewBuilder<[u8]>;
pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
Loading

0 comments on commit 1fd109b

Please sign in to comment.