Skip to content

Commit

Permalink
Merge branch 'master' into issue1108_add_comparison_for_BinaryArray
Browse files Browse the repository at this point in the history
  • Loading branch information
HaoYang670 committed Jan 19, 2022
2 parents 05619e3 + 9d637a4 commit ff3ab90
Show file tree
Hide file tree
Showing 20 changed files with 1,978 additions and 218 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/miri.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [nightly-2021-10-23]
rust: [nightly-2022-01-17]
steps:
- uses: actions/checkout@v2
with:
Expand Down
60 changes: 42 additions & 18 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,10 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}

/// Creates a [GenericBinaryArray] from a vector of byte slices
///
/// See also [`Self::from_iter_values`]
pub fn from_vec(v: Vec<&[u8]>) -> Self {
let mut offsets = Vec::with_capacity(v.len() + 1);
let mut values = Vec::new();
let mut length_so_far: OffsetSize = OffsetSize::zero();
offsets.push(length_so_far);
for s in &v {
length_so_far += OffsetSize::from_usize(s.len()).unwrap();
offsets.push(length_so_far);
values.extend_from_slice(s);
}
let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(v.len())
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values));
let array_data = unsafe { array_data.build_unchecked() };
GenericBinaryArray::<OffsetSize>::from(array_data)
Self::from_iter_values(v)
}

/// Creates a [GenericBinaryArray] from a vector of Optional (null) byte slices
Expand Down Expand Up @@ -171,6 +159,42 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
let data = unsafe { builder.build_unchecked() };
Self::from(data)
}

/// Creates a `GenericBinaryArray` based on an iterator of values without nulls
pub fn from_iter_values<Ptr, I>(iter: I) -> Self
where
Ptr: AsRef<[u8]>,
I: IntoIterator<Item = Ptr>,
{
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.

let mut offsets =
MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
let mut values = MutableBuffer::new(0);

let mut length_so_far = OffsetSize::zero();
offsets.push(length_so_far);

for s in iter {
let s = s.as_ref();
length_so_far += OffsetSize::from_usize(s.len()).unwrap();
offsets.push(length_so_far);
values.extend_from_slice(s);
}

// iterator size hint may not be correct so compute the actual number of offsets
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
}
}

impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {
Expand Down Expand Up @@ -359,21 +383,21 @@ impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<Option<&[u8]>>>
for GenericBinaryArray<OffsetSize>
{
fn from(v: Vec<Option<&[u8]>>) -> Self {
GenericBinaryArray::<OffsetSize>::from_opt_vec(v)
Self::from_opt_vec(v)
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<&[u8]>>
for GenericBinaryArray<OffsetSize>
{
fn from(v: Vec<&[u8]>) -> Self {
GenericBinaryArray::<OffsetSize>::from_vec(v)
Self::from_iter_values(v)
}
}

impl<T: BinaryOffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
fn from(v: GenericListArray<T>) -> Self {
GenericBinaryArray::<T>::from_list(v)
Self::from_list(v)
}
}

Expand Down
7 changes: 4 additions & 3 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,10 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
}

/// Creates a `GenericStringArray` based on an iterator of values without nulls
pub fn from_iter_values<Ptr, I: IntoIterator<Item = Ptr>>(iter: I) -> Self
pub fn from_iter_values<Ptr, I>(iter: I) -> Self
where
Ptr: AsRef<str>,
I: IntoIterator<Item = Ptr>,
{
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
Expand Down Expand Up @@ -306,15 +307,15 @@ impl<OffsetSize: StringOffsetSizeTrait> From<Vec<&str>>
for GenericStringArray<OffsetSize>
{
fn from(v: Vec<&str>) -> Self {
GenericStringArray::<OffsetSize>::from_iter_values(v)
Self::from_iter_values(v)
}
}

impl<OffsetSize: StringOffsetSizeTrait> From<Vec<String>>
for GenericStringArray<OffsetSize>
{
fn from(v: Vec<String>) -> Self {
GenericStringArray::<OffsetSize>::from_iter_values(v)
Self::from_iter_values(v)
}
}

Expand Down
32 changes: 32 additions & 0 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,15 @@ impl BooleanBufferBuilder {
}
}

/// Resizes the buffer, either truncating its contents (with no change in capacity), or
/// growing it (potentially reallocating it) and writing `false` in the newly available bits.
#[inline]
pub fn resize(&mut self, len: usize) {
let len_bytes = bit_util::ceil(len, 8);
self.buffer.resize(len_bytes, 0);
self.len = len;
}

#[inline]
pub fn append(&mut self, v: bool) {
self.advance(1);
Expand Down Expand Up @@ -2931,6 +2940,29 @@ mod tests {
assert_eq!(arr1, arr2);
}

#[test]
fn test_boolean_array_builder_resize() {
let mut builder = BooleanBufferBuilder::new(20);
builder.append_n(4, true);
builder.append_n(7, false);
builder.append_n(2, true);
builder.resize(20);

assert_eq!(builder.len, 20);
assert_eq!(
builder.buffer.as_slice(),
&[0b00001111, 0b00011000, 0b00000000]
);

builder.resize(5);
assert_eq!(builder.len, 5);
assert_eq!(builder.buffer.as_slice(), &[0b00001111]);

builder.append_n(4, true);
assert_eq!(builder.len, 9);
assert_eq!(builder.buffer.as_slice(), &[0b11101111, 0b00000001]);
}

#[test]
fn test_boolean_builder_increases_buffer_len() {
// 00000010 01001000
Expand Down
Loading

0 comments on commit ff3ab90

Please sign in to comment.