From f7b343e787aeb747c96c5c1aaf141260636c3337 Mon Sep 17 00:00:00 2001 From: NilsBarlaug Date: Sat, 9 Oct 2021 05:29:32 +0200 Subject: [PATCH] Do not check offsets or utf8 validity in ffi (#510) --- src/array/binary/ffi.rs | 2 +- src/array/binary/mod.rs | 37 +++++++++++++++++++++++++++++++++++-- src/array/utf8/ffi.rs | 2 +- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/array/binary/ffi.rs b/src/array/binary/ffi.rs index 3030dedca31..4ca8d981ebd 100644 --- a/src/array/binary/ffi.rs +++ b/src/array/binary/ffi.rs @@ -44,7 +44,7 @@ impl FromFfi for BinaryArray { validity = validity.map(|x| x.slice(offset, length)) } - Ok(Self::from_data( + Ok(Self::from_data_unchecked( Self::default_data_type(), offsets, values, diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 11b76c37367..036e3f74122 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -1,8 +1,8 @@ use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType}; use super::{ - display_fmt, display_helper, specification::check_offsets, specification::Offset, Array, - GenericBinaryArray, + display_fmt, display_helper, specification::check_offsets, + specification::check_offsets_minimal, specification::Offset, Array, GenericBinaryArray, }; mod ffi; @@ -84,6 +84,39 @@ impl BinaryArray { } } + /// The same as [`BinaryArray::from_data`] but does not check for offsets. + /// # Safety + /// * `offsets` MUST be monotonically increasing + /// # Panics + /// This function panics iff: + /// * The `data_type`'s physical type is not consistent with the offset `O`. + /// * The last element of `offsets` is different from `values.len()`. + /// * The validity is not `None` and its length is different from `offsets.len() - 1`. + pub fn from_data_unchecked( + data_type: DataType, + offsets: Buffer, + values: Buffer, + validity: Option, + ) -> Self { + check_offsets_minimal(&offsets, values.len()); + + if let Some(validity) = &validity { + assert_eq!(offsets.len() - 1, validity.len()); + } + + if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + panic!("BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary") + } + + Self { + data_type, + offsets, + values, + validity, + offset: 0, + } + } + /// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`]. /// # Implementation /// This function is `O(1)`: all data will be shared between both arrays. diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index 98d47efa9dc..c9600d7d9d9 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -33,6 +33,6 @@ impl FromFfi for Utf8Array { validity = validity.map(|x| x.slice(offset, length)) } let data_type = Self::default_data_type(); - Ok(Self::from_data(data_type, offsets, values, validity)) + Ok(Self::from_data_unchecked(data_type, offsets, values, validity)) } }