Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved usages of unsafe and added tests for its invariants. (#403)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 14, 2021
1 parent 06892e9 commit ee23796
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 22 deletions.
12 changes: 6 additions & 6 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ impl<O: Offset> BinaryArray<O> {
}
}

/// Sets the validity bitmap on this [`BinaryArray`].
/// Clones this [`BinaryArray`] with a different validity.
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
/// Panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
panic!("validity's length must be equal to the array's length")
}
let mut arr = self.clone();
arr.validity = validity;
Expand All @@ -112,7 +112,7 @@ impl<O: Offset> BinaryArray<O> {
impl<O: Offset> BinaryArray<O> {
/// Returns the element at index `i`
/// # Panics
/// iff `i > self.len()`
/// iff `i >= self.len()`
pub fn value(&self, i: usize) -> &[u8] {
let offsets = self.offsets.as_slice();
let offset = offsets[i];
Expand All @@ -127,8 +127,8 @@ impl<O: Offset> BinaryArray<O> {
/// # Safety
/// Assumes that the `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
let offset = *self.offsets.as_ptr().add(i);
let offset_1 = *self.offsets.as_ptr().add(i + 1);
let offset = *self.offsets.get_unchecked(i);
let offset_1 = *self.offsets.get_unchecked(i + 1);
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

Expand Down
15 changes: 7 additions & 8 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub use mutable::*;
/// Cloning and slicing this struct is `O(1)`.
#[derive(Debug, Clone)]
pub struct FixedSizeBinaryArray {
size: i32, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type.
size: usize, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type.
data_type: DataType,
values: Buffer<u8>,
validity: Option<Bitmap>,
Expand All @@ -34,9 +34,9 @@ impl FixedSizeBinaryArray {

/// Returns a new [`FixedSizeBinaryArray`].
pub fn from_data(data_type: DataType, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
let size = *Self::get_size(&data_type);
let size = *Self::get_size(&data_type) as usize;

assert_eq!(values.len() % (size as usize), 0);
assert_eq!(values.len() % size, 0);

Self {
size,
Expand Down Expand Up @@ -83,15 +83,14 @@ impl FixedSizeBinaryArray {
/// Assumes that the `i < self.len`.
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
std::slice::from_raw_parts(
self.values.as_ptr().add(i * self.size as usize),
self.size as usize,
)
// soundness: invariant of the function.
self.values
.get_unchecked(i * self.size..(i + 1) * self.size)
}

/// Returns the size
pub fn size(&self) -> usize {
self.size as usize
self.size
}

/// Sets the validity bitmap on this [`FixedSizeBinaryArray`].
Expand Down
15 changes: 8 additions & 7 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ impl<O: Offset> Utf8Array<O> {
}
}

/// The same as [`Utf8Array::from_data`] but does not check for utf8.
/// The same as [`Utf8Array::from_data`] but does not check for valid utf8.
/// # Safety
/// `values` buffer must contain valid utf8 between every `offset`
pub unsafe fn from_data_unchecked(
Expand Down Expand Up @@ -122,13 +122,14 @@ impl<O: Offset> Utf8Array<O> {
/// # Safety
/// This function is safe `iff` `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
let offset = *self.offsets.as_ptr().add(i);
let offset_1 = *self.offsets.as_ptr().add(i + 1);
// soundness: the invariant of the function
let offset = *self.offsets.get_unchecked(i);
let offset_1 = *self.offsets.get_unchecked(i + 1);
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

// Soundness: `from_data` verifies that each slot is utf8 and offsets are built correctly.
let slice = std::slice::from_raw_parts(self.values.as_ptr().add(offset), length);
let slice = &self.values.as_slice()[offset..offset + length];
// soundness: the invariant of the struct
std::str::from_utf8_unchecked(slice)
}

Expand Down Expand Up @@ -171,8 +172,8 @@ impl<O: Offset> Utf8Array<O> {
let offset = offset.to_usize();

let slice = &self.values.as_slice()[offset..offset + length];
// todo: validate utf8 so that we can use the unsafe version
std::str::from_utf8(slice).unwrap()
// soundness: we always check for utf8 soundness on constructors.
unsafe { std::str::from_utf8_unchecked(slice) }
}

/// Returns the offsets of this [`Utf8Array`].
Expand Down
64 changes: 64 additions & 0 deletions tests/it/array/binary/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow2::{
array::{Array, BinaryArray},
bitmap::Bitmap,
buffer::Buffer,
datatypes::DataType,
};

Expand Down Expand Up @@ -71,3 +72,66 @@ fn from_iter() {
let a: BinaryArray<i32> = iter.collect();
assert_eq!(a.len(), 2);
}

#[test]
fn with_validity() {
let array = BinaryArray::<i32>::from(&[Some(b"hello".as_ref()), Some(b" ".as_ref()), None]);

let array = array.with_validity(None);

let a = array.validity();
assert_eq!(a, &None);
}

#[test]
#[should_panic]
fn wrong_offsets() {
let offsets = Buffer::from(&[0, 5, 4]); // invalid offsets
let values = Buffer::from(b"abbbbb");
BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
}

#[test]
#[should_panic]
fn wrong_data_type() {
let offsets = Buffer::from(&[0, 4]);
let values = Buffer::from(b"abbb");
BinaryArray::<i32>::from_data(DataType::Int8, offsets, values, None);
}

#[test]
#[should_panic]
fn value_with_wrong_offsets_panics() {
let offsets = Buffer::from(&[0, 10, 11, 4]);
let values = Buffer::from(b"abbb");
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

// but access is still checked (and panics)
// without checks, this would result in reading beyond bounds
array.value(0);
}

#[test]
#[should_panic]
fn index_out_of_bounds_panics() {
let offsets = Buffer::from(&[0, 1, 2, 4]);
let values = Buffer::from(b"abbb");
let array = BinaryArray::<i32>::from_data(DataType::Utf8, offsets, values, None);

array.value(3);
}

#[test]
#[should_panic]
fn value_unchecked_with_wrong_offsets_panics() {
let offsets = Buffer::from(&[0, 10, 11, 4]);
let values = Buffer::from(b"abbb");
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

// but access is still checked (and panics)
// without checks, this would result in reading beyond bounds,
// even if `0` is in bounds
unsafe { array.value_unchecked(0) };
}
39 changes: 38 additions & 1 deletion tests/it/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,44 @@ fn wrong_offsets() {
#[test]
#[should_panic]
fn wrong_data_type() {
let offsets = Buffer::from(&[0, 4]); // invalid offsets
let offsets = Buffer::from(&[0, 4]);
let values = Buffer::from(b"abbb");
Utf8Array::<i32>::from_data(DataType::Int8, offsets, values, None);
}

#[test]
#[should_panic]
fn value_with_wrong_offsets_panics() {
let offsets = Buffer::from(&[0, 10, 11, 4]);
let values = Buffer::from(b"abbb");
// the 10-11 is not checked
let array = Utf8Array::<i32>::from_data(DataType::Utf8, offsets, values, None);

// but access is still checked (and panics)
// without checks, this would result in reading beyond bounds
array.value(0);
}

#[test]
#[should_panic]
fn index_out_of_bounds_panics() {
let offsets = Buffer::from(&[0, 1, 2, 4]);
let values = Buffer::from(b"abbb");
let array = Utf8Array::<i32>::from_data(DataType::Utf8, offsets, values, None);

array.value(3);
}

#[test]
#[should_panic]
fn value_unchecked_with_wrong_offsets_panics() {
let offsets = Buffer::from(&[0, 10, 11, 4]);
let values = Buffer::from(b"abbb");
// the 10-11 is not checked
let array = Utf8Array::<i32>::from_data(DataType::Utf8, offsets, values, None);

// but access is still checked (and panics)
// without checks, this would result in reading beyond bounds,
// even if index `0` is in bounds
unsafe { array.value_unchecked(0) };
}

0 comments on commit ee23796

Please sign in to comment.