Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Revert "Fixed ffi of sliced arrays (#540)"
Browse files Browse the repository at this point in the history
This reverts commit e6b6c83.
  • Loading branch information
jorgecarleitao committed Oct 26, 2021
1 parent 4491954 commit 24851fa
Show file tree
Hide file tree
Showing 33 changed files with 254 additions and 270 deletions.
81 changes: 8 additions & 73 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,52 +39,15 @@ def tearDown(self):
# No leak of C++ memory
self.assertEqual(self.old_allocated_cpp, pyarrow.total_allocated_bytes())

def test_primitive(self):
a = pyarrow.array([0, None, 2, 3, 4])
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_primitive_sliced(self):
a = pyarrow.array([0, None, 2, 3, 4]).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_boolean(self):
a = pyarrow.array([True, None, False, True, False])
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_boolean_sliced(self):
a = pyarrow.array([True, None, False, True, False]).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_string(self):
def test_string_roundtrip(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.array(["a", None, "ccc"])
b = arrow_pyarrow_integration_testing.round_trip_array(a)
c = pyarrow.array(["a", None, "ccc"])
self.assertEqual(b, c)

def test_string_sliced(self):
a = pyarrow.array(["a", None, "ccc"]).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_decimal_roundtrip(self):
"""
Python -> Rust -> Python
Expand Down Expand Up @@ -112,17 +75,10 @@ def test_list_array(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_list_sliced(self):
a = pyarrow.array(
[[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64())
).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_struct(self):
def test_struct_array(self):
"""
Python -> Rust -> Python
"""
fields = [
("f1", pyarrow.int32()),
("f2", pyarrow.string()),
Expand All @@ -143,27 +99,6 @@ def test_struct(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

# see https://issues.apache.org/jira/browse/ARROW-14383
def _test_struct_sliced(self):
fields = [
("f1", pyarrow.int32()),
("f2", pyarrow.string()),
]
a = pyarrow.array(
[
{"f1": 1, "f2": "a"},
None,
{"f1": 3, "f2": None},
{"f1": None, "f2": "d"},
{"f1": None, "f2": None},
],
pyarrow.struct(fields),
).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_list_list_array(self):
"""
Python -> Rust -> Python
Expand Down
11 changes: 11 additions & 0 deletions src/array/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ This document describes the overall design of this module.
* `from_trusted_len_values_iter` from an iterator of trusted len of values
* `try_from_trusted_len_iter` from an fallible iterator of trusted len of optional values

### Slot offsets

* An array MUST have a `offset: usize` measuring the number of slots that the array is currently offsetted by if the specification requires.

* An array MUST implement `fn slice(&self, offset: usize, length: usize) -> Self` that returns an offseted and/or truncated clone of the array. This function MUST increase the array's offset if it exists.

* Conversely, `offset` MUST only be changed by `slice`.

The rational of the above is that it enable us to be fully interoperable with the offset logic supported by the C data interface, while at the same time easily perform array slices
within Rust's type safety mechanism.

### Mutable Arrays

* An array MAY have a mutable counterpart. E.g. `MutablePrimitiveArray<T>` is the mutable counterpart of `PrimitiveArray<T>`.
Expand Down
28 changes: 15 additions & 13 deletions src/array/binary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,17 @@ use super::BinaryArray;

unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
let offset = self
.validity
.as_ref()
.map(|x| x.offset())
.unwrap_or_default();

let offsets = std::ptr::NonNull::new(unsafe {
self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8
});

vec![
self.validity.as_ref().map(|x| x.as_ptr()),
offsets,
std::ptr::NonNull::new(self.offsets.as_ptr() as *mut u8),
std::ptr::NonNull::new(self.values.as_ptr() as *mut u8),
]
}

#[inline]
fn offset(&self) -> usize {
self.offset
}
}

impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
Expand All @@ -38,10 +33,17 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
};
assert_eq!(data_type, expected);

let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let length = array.array().len();
let offset = array.array().offset();
let mut validity = unsafe { array.validity() }?;
let mut offsets = unsafe { array.buffer::<O>(0) }?;
let values = unsafe { array.buffer::<u8>(1) }?;

if offset > 0 {
offsets = offsets.slice(offset, length);
validity = validity.map(|x| x.slice(offset, length))
}

Ok(Self::from_data_unchecked(
Self::default_data_type(),
offsets,
Expand Down
4 changes: 4 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub struct BinaryArray<O: Offset> {
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
offset: usize,
}

// constructors
Expand Down Expand Up @@ -70,6 +71,7 @@ impl<O: Offset> BinaryArray<O> {
offsets,
values,
validity,
offset: 0,
}
}

Expand Down Expand Up @@ -111,6 +113,7 @@ impl<O: Offset> BinaryArray<O> {
offsets,
values,
validity,
offset: 0,
}
}

Expand Down Expand Up @@ -143,6 +146,7 @@ impl<O: Offset> BinaryArray<O> {
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
}

Expand Down
22 changes: 12 additions & 10 deletions src/array/boolean/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,30 @@ use super::BooleanArray;

unsafe impl ToFfi for BooleanArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
let offset = self
.validity
.as_ref()
.map(|x| x.offset())
.unwrap_or_default();

assert!(self.values.offset() >= offset);
vec![
self.validity.as_ref().map(|x| x.as_ptr()),
Some(self.values.as_ptr()),
]
}

fn offset(&self) -> usize {
self.offset
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for BooleanArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
assert_eq!(data_type, DataType::Boolean);
let length = array.array().len();
let offset = array.array().offset();
let mut validity = unsafe { array.validity() }?;
let mut values = unsafe { array.bitmap(0) }?;

let validity = unsafe { array.validity() }?;
let values = unsafe { array.bitmap(0) }?;

if offset > 0 {
values = values.slice(offset, length);
validity = validity.map(|x| x.slice(offset, length))
}
Ok(Self::from_data(data_type, values, validity))
}
}
9 changes: 3 additions & 6 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct BooleanArray {
data_type: DataType,
values: Bitmap,
validity: Option<Bitmap>,
offset: usize,
}

impl BooleanArray {
Expand All @@ -45,13 +46,11 @@ impl BooleanArray {
if data_type.to_physical_type() != PhysicalType::Boolean {
panic!("BooleanArray can only be initialized with DataType::Boolean")
}
if matches!(&validity, Some(bitmap) if bitmap.offset() != values.offset()) {
panic!("validity's bitmap offset must equal values offset")
}
Self {
data_type,
values,
validity,
offset: 0,
}
}

Expand Down Expand Up @@ -84,6 +83,7 @@ impl BooleanArray {
data_type: self.data_type.clone(),
values: self.values.clone().slice_unchecked(offset, length),
validity,
offset: self.offset + offset,
}
}

Expand All @@ -94,9 +94,6 @@ impl BooleanArray {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
if matches!(&validity, Some(bitmap) if bitmap.offset() != self.values.offset()) {
panic!("validity's bitmap offset must equal values offset")
}
let mut arr = self.clone();
arr.validity = validity;
arr
Expand Down
15 changes: 13 additions & 2 deletions src/array/dictionary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,25 @@ unsafe impl<K: DictionaryKey> ToFfi for DictionaryArray<K> {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
self.keys.buffers()
}

#[inline]
fn offset(&self) -> usize {
self.offset
}
}

impl<K: DictionaryKey, A: ffi::ArrowArrayRef> FromFfi<A> for DictionaryArray<K> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
// keys: similar to PrimitiveArray, but the datatype is the inner one
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<K>(0) }?;
let length = array.array().len();
let offset = array.array().offset();
let mut validity = unsafe { array.validity() }?;
let mut values = unsafe { array.buffer::<K>(0) }?;

if offset > 0 {
values = values.slice(offset, length);
validity = validity.map(|x| x.slice(offset, length))
}
let keys = PrimitiveArray::<K>::from_data(K::DATA_TYPE, values, validity);
// values
let values = array.dictionary()?.unwrap();
Expand Down
4 changes: 4 additions & 0 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub struct DictionaryArray<K: DictionaryKey> {
data_type: DataType,
keys: PrimitiveArray<K>,
values: Arc<dyn Array>,
offset: usize,
}

impl<K: DictionaryKey> DictionaryArray<K> {
Expand Down Expand Up @@ -68,6 +69,7 @@ impl<K: DictionaryKey> DictionaryArray<K> {
data_type,
keys,
values,
offset: 0,
}
}

Expand All @@ -79,6 +81,7 @@ impl<K: DictionaryKey> DictionaryArray<K> {
data_type: self.data_type.clone(),
keys: self.keys.clone().slice(offset, length),
values: self.values.clone(),
offset: self.offset + offset,
}
}

Expand All @@ -90,6 +93,7 @@ impl<K: DictionaryKey> DictionaryArray<K> {
data_type: self.data_type.clone(),
keys: self.keys.clone().slice_unchecked(offset, length),
values: self.values.clone(),
offset: self.offset + offset,
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ pub unsafe trait ToFfi {
/// The pointers to the buffers.
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>>;

/// The offset
fn offset(&self) -> usize;

/// The children
fn children(&self) -> Vec<Arc<dyn Array>> {
vec![]
Expand Down
23 changes: 0 additions & 23 deletions src/array/fixed_size_binary/ffi.rs

This file was deleted.

Loading

0 comments on commit 24851fa

Please sign in to comment.