From cf2ea2edd152095d895428d1e899eddcf0ea1a22 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 8 Jun 2022 15:44:56 +0000 Subject: [PATCH] Improved API --- examples/cow.rs | 44 ++++++++++---------------------------- src/array/primitive/mod.rs | 39 ++++++++++----------------------- 2 files changed, 22 insertions(+), 61 deletions(-) diff --git a/examples/cow.rs b/examples/cow.rs index e0bac6d3702..4b76972a795 100644 --- a/examples/cow.rs +++ b/examples/cow.rs @@ -1,42 +1,20 @@ // This example demos how to operate on arrays in-place. -use arrow2::{ - array::{Array, PrimitiveArray}, - types::NativeType, -}; - -// this function will clone-on-write the array and apply `f` to its values -fn cow_apply(array: &mut dyn Array, f: F) { - // 1. downcast the array to its concrete type - let array = array - .as_any_mut() - .downcast_mut::>() - .unwrap(); - - // 2. empty the mut reference and create a new array on the stack with its contents - let new_array = array.take(); - - // 3. deconstruct the array into its parts - let (dt, values, validity) = new_array.into_inner(); - - // 4. clone-on-write the values - let mut values = values.make_mut(); - - // 5. apply the function over the values - f(&mut values); - - // 6. assign the new values to the array - array.try_assign(dt, values.into(), validity).unwrap(); -} +use arrow2::array::{Array, PrimitiveArray}; fn main() { // say we have have received an array - let mut array = PrimitiveArray::from_vec(vec![1i32, 2]).boxed(); + let mut array: Box = PrimitiveArray::from_vec(vec![1i32, 2]).boxed(); // we can apply a transformation to its values without allocating a new array as follows: - cow_apply(array.as_mut(), |values: &mut [i32]| { - values.iter_mut().for_each(|x| *x *= 10) - }); + // 1. downcast it to the correct type (known via `array.data_type().to_physical_type()`) + let array = array + .as_any_mut() + .downcast_mut::>() + .unwrap(); + + // 2. call `apply_values` with the function to apply over the values + array.apply_values(|x| x.iter_mut().for_each(|x| *x *= 10)); // confirm that it gives the right result :) - assert_eq!(array.as_ref(), PrimitiveArray::from_vec(vec![10i32, 20])); + assert_eq!(array, &PrimitiveArray::from_vec(vec![10i32, 20])); } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index b2c787b1c53..a6d468f2d03 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -253,35 +253,18 @@ impl PrimitiveArray { arr } - /// Returns a new [`PrimitiveArray`] by taking everything from this one. - #[must_use] - pub fn take(&mut self) -> Self { - let mut data_type: DataType = T::PRIMITIVE.into(); - std::mem::swap(&mut self.data_type, &mut data_type); - Self { - data_type, - values: std::mem::take(&mut self.values), - validity: std::mem::take(&mut self.validity), - } - } - - /// Tries to assign the arguments to itself. + /// Applies a function `f` to the values of this array, cloning the values + /// iff they are being shared with others /// - /// This function is semantically similar to [`Self::try_new`] but it can be used to populate an existing - /// Array. - /// # Errors - /// Errors iff the `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`] - pub fn try_assign( - &mut self, - data_type: DataType, - values: Buffer, - validity: Option, - ) -> Result<(), Error> { - check(&data_type, &self.values, &self.validity)?; - self.data_type = data_type; - self.values = values; - self.validity = validity; - Ok(()) + /// This is an API to use clone-on-write + /// # Implementation + /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)` + /// if it is being shared (since it results in a `O(N)` memcopy). + pub fn apply_values(&mut self, f: F) { + let values = std::mem::take(&mut self.values); + let mut values = values.make_mut(); + f(&mut values); + self.values = values.into(); } /// Deconstructs this [`PrimitiveArray`] into its internal components