Skip to content

Commit

Permalink
Split out implementations and add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel committed Dec 11, 2023
1 parent 4b24ad9 commit ea99db7
Show file tree
Hide file tree
Showing 21 changed files with 1,309 additions and 273 deletions.
19 changes: 16 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,26 @@ categories.workspace = true

[features]
default = ["arrow-rs", "derive"]
arrow-rs = ["dep:arrow-array", "dep:arrow-buffer"]
arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema"]
derive = ["dep:narrow-derive"]

[dependencies]
arrow-array = { version = "49.0.0", optional = true }
arrow-buffer = { version = "49.0.0", optional = true }
# arrow-array = { version = "49.0.0", optional = true }
# arrow-buffer = { version = "49.0.0", optional = true }
# arrow-schema = { version = "49.0.0", optional = true }
arrow-array = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true }
arrow-buffer = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true }
arrow-schema = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true }
narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true }

[dev-dependencies]
# arrow-cast = { version = "49.0.0", default-features = false, features = ["prettyprint"] }
arrow-cast = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["prettyprint"] }
bytes = "1.5.0"
criterion = { version = "0.5.1", default-features = false }
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", features = ["arrow"] }
# parquet = { version = "49.0.0", default-features = false, features = ["arrow"] }

[profile.bench]
lto = true
Expand All @@ -48,3 +57,7 @@ codegen-units = 1
[[bench]]
name = "narrow"
harness = false

# [[example]]
# name = "parquet"
# required-features = ["arrow-rs", "derive"]
3 changes: 2 additions & 1 deletion narrow-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ const CRATE: &str = "narrow";

static NARROW: Lazy<String> = Lazy::new(|| match proc_macro_crate::crate_name(CRATE) {
Ok(found) => match found {
FoundCrate::Itself => "crate".to_string(),
// Requires `extern crate self as narrow`
FoundCrate::Itself => CRATE.to_string(),
FoundCrate::Name(name) => name,
},
_ => CRATE.to_string(),
Expand Down
9 changes: 8 additions & 1 deletion src/array/fixed_size_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use super::Array;
use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferType, VecBuffer},
buffer::{Buffer, BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
FixedSize, Index, Length,
Expand Down Expand Up @@ -56,6 +56,13 @@ where
{
}

// todo(mbrobbel): buffer_ref traits?
impl<T: FixedSize, Buffer: BufferType> AsRef<[T]> for FixedSizePrimitiveArray<T, false, Buffer> {
fn as_ref(&self) -> &[T] {
self.0.as_slice()
}
}

impl<T: FixedSize, const NULLABLE: bool, Buffer: BufferType> Default
for FixedSizePrimitiveArray<T, NULLABLE, Buffer>
where
Expand Down
10 changes: 10 additions & 0 deletions src/array/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ where
}
}

impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend<&'a &'a str>
for StringArray<false, OffsetItem, Buffer>
where
VariableSizeBinaryArray<false, OffsetItem, Buffer>: Extend<&'a [u8]>,
{
fn extend<I: IntoIterator<Item = &'a &'a str>>(&mut self, iter: I) {
self.0.extend(iter.into_iter().map(|str| str.as_bytes()));
}
}

impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend<Option<&'a str>>
for StringArray<true, OffsetItem, Buffer>
where
Expand Down
2 changes: 1 addition & 1 deletion src/array/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub trait StructArrayType: ArrayType {
/// The array type that stores items of this struct. Note this differs from
/// the [`ArrayType`] array because that wraps this array. Also note that this
/// has no [`Array`] bound.
type Array<Buffer: BufferType>;
type Array<Buffer: BufferType>; // into<fields> this then requires all arraytype impls to provide a field
}

/// Array for product types.
Expand Down
155 changes: 137 additions & 18 deletions src/arrow/array/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
//! Interop with `arrow-rs` boolean array.
use crate::{array::BooleanArray, bitmap::Bitmap, buffer::BufferType};
use std::sync::Arc;

use crate::{
array::BooleanArray, arrow::ArrowArray, bitmap::Bitmap, buffer::BufferType, nullable::Nullable,
validity::Validity,
};
use arrow_buffer::{BooleanBuffer, NullBuffer};
use arrow_schema::{DataType, Field};

impl<const NULLABLE: bool, Buffer: BufferType> ArrowArray for BooleanArray<NULLABLE, Buffer>
where
Bitmap<Buffer>: Validity<NULLABLE>,
{
type Array = arrow_array::BooleanArray;

fn as_field(&self, name: &str) -> arrow_schema::Field {
Field::new(name, DataType::Boolean, NULLABLE)
}
}

impl<const NULLABLE: bool, Buffer: BufferType> From<Arc<dyn arrow_array::Array>>
for BooleanArray<NULLABLE, Buffer>
where
Bitmap<Buffer>: Validity<NULLABLE>,
Self: From<arrow_array::BooleanArray>,
{
fn from(value: Arc<dyn arrow_array::Array>) -> Self {
Self::from(arrow_array::BooleanArray::from(value.to_data()))
}
}

impl<Buffer: BufferType> From<BooleanArray<false, Buffer>> for arrow_array::BooleanArray
where
Expand All @@ -21,27 +49,118 @@ where
}
}

/// Panics when there are nulls
impl<Buffer: BufferType> From<arrow_array::BooleanArray> for BooleanArray<false, Buffer>
where
Bitmap<Buffer>: From<BooleanBuffer>,
{
fn from(value: arrow_array::BooleanArray) -> Self {
let (boolean_buffer, nulls_opt) = value.into_parts();
match nulls_opt {
Some(_) => panic!("expected array without a null buffer"),
None => BooleanArray(boolean_buffer.into()),
}
}
}

/// Panics when there are no nulls
// OR allocate one instead and use `TryFrom` conversion?
impl<Buffer: BufferType> From<arrow_array::BooleanArray> for BooleanArray<true, Buffer>
where
Bitmap<Buffer>: From<BooleanBuffer> + From<NullBuffer>,
{
fn from(value: arrow_array::BooleanArray) -> Self {
let (boolean_buffer, nulls_opt) = value.into_parts();
match nulls_opt {
Some(null_buffer) => BooleanArray(Nullable {
data: boolean_buffer.into(),
validity: null_buffer.into(),
}),
None => panic!("expected array with a null buffer"),
}
}
}

#[cfg(test)]
mod tests {
use arrow_array::Array;
use crate::{array::BooleanArray, buffer::ArcBuffer};

use super::*;
use crate::{bitmap::ValidityBitmap, Length};
const INPUT: [bool; 4] = [true, true, false, true];
const INPUT_NULLABLE: [Option<bool>; 4] = [Some(true), None, Some(false), Some(true)];

#[test]
fn convert() {
let input = [true, false, true, true];
let array = input.into_iter().collect::<BooleanArray>();
assert_eq!(array.len(), 4);
let array_arrow: arrow_array::BooleanArray = array.into();
assert_eq!(array_arrow.len(), 4);

let input_nullable = [Some(true), None, Some(false)];
let array_nullable = input_nullable.into_iter().collect::<BooleanArray<true>>();
assert_eq!(array_nullable.len(), 3);
assert_eq!(array_nullable.null_count(), 1);
let array_arrow_nullable: arrow_array::BooleanArray = array_nullable.into();
assert_eq!(array_arrow_nullable.len(), 3);
assert_eq!(array_arrow_nullable.null_count(), 1);
fn from() {
let boolean_array = INPUT.into_iter().collect::<BooleanArray>();
assert_eq!(
arrow_array::BooleanArray::from(boolean_array)
.into_iter()
.flatten()
.collect::<Vec<_>>(),
INPUT
);

let boolean_array_arc = INPUT
.into_iter()
.collect::<BooleanArray<false, ArcBuffer>>();
assert_eq!(
arrow_array::BooleanArray::from(boolean_array_arc)
.into_iter()
.flatten()
.collect::<Vec<_>>(),
INPUT
);

let boolean_array_nullable = INPUT_NULLABLE.into_iter().collect::<BooleanArray<true>>();
assert_eq!(
arrow_array::BooleanArray::from(boolean_array_nullable)
.into_iter()
.collect::<Vec<_>>(),
INPUT_NULLABLE
);
}

#[test]
#[should_panic(expected = "expected array with a null buffer")]
fn into_nullable() {
let boolean_array = arrow_array::BooleanArray::from(INPUT.into_iter().collect::<Vec<_>>());
let _ = BooleanArray::<true, crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer>::from(
boolean_array,
);
}

#[test]
#[should_panic(expected = "expected array without a null buffer")]
fn into_non_nullable() {
let boolean_array_nullable = INPUT_NULLABLE
.into_iter()
.collect::<arrow_array::BooleanArray>();
let _ = BooleanArray::<false, crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer>::from(
boolean_array_nullable,
);
}

#[test]
fn into() {
let boolean_array = arrow_array::BooleanArray::from(INPUT.into_iter().collect::<Vec<_>>());
assert_eq!(
BooleanArray::<false, crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer>::from(
boolean_array
)
.into_iter()
.collect::<Vec<_>>(),
INPUT
);

let boolean_array_nullable = INPUT_NULLABLE
.into_iter()
.collect::<arrow_array::BooleanArray>();
assert_eq!(
BooleanArray::<true, crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer>::from(
boolean_array_nullable
)
.into_iter()
.collect::<Vec<_>>(),
INPUT_NULLABLE
);
}
}
Loading

0 comments on commit ea99db7

Please sign in to comment.