From d2742752b8766349409904fd2067de2d41128915 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 13 Apr 2023 18:55:06 +0200 Subject: [PATCH] Every other arm, limiting pain as much as possible --- src/array/dictionary/mod.rs | 4 +- src/array/dictionary/mutable.rs | 4 +- src/array/struct_/mod.rs | 8 ++- src/array/union/mod.rs | 4 +- src/compute/cast/dictionary_to.rs | 4 +- src/datatypes/mod.rs | 42 ++++++++---- src/ffi/schema.rs | 30 +++++---- src/io/avro/read/nested.rs | 2 +- src/io/avro/read/schema.rs | 6 +- src/io/ipc/read/schema.rs | 11 ++-- src/io/ipc/write/schema.rs | 8 ++- src/io/json/read/infer_schema.rs | 17 +++-- src/io/json_integration/read/schema.rs | 12 ++-- src/io/orc/read/mod.rs | 7 +- .../read/deserialize/binary/dictionary.rs | 6 +- .../fixed_size_binary/dictionary.rs | 6 +- .../read/deserialize/primitive/dictionary.rs | 6 +- src/io/parquet/read/deserialize/struct_.rs | 6 +- src/io/parquet/read/schema/convert.rs | 26 ++++---- src/io/parquet/read/statistics/mod.rs | 4 +- src/io/parquet/write/mod.rs | 4 +- src/io/parquet/write/pages.rs | 20 ++++-- tests/it/array/dictionary/mod.rs | 29 ++++++--- tests/it/array/fixed_size_binary/mod.rs | 4 +- tests/it/array/growable/list.rs | 4 +- tests/it/array/growable/map.rs | 2 +- tests/it/array/growable/mod.rs | 8 ++- tests/it/array/growable/struct_.rs | 2 +- tests/it/array/growable/union.rs | 6 +- tests/it/array/map/mod.rs | 11 +++- tests/it/array/mod.rs | 22 ++++--- tests/it/array/struct_/iterator.rs | 2 +- tests/it/array/struct_/mod.rs | 2 +- tests/it/array/struct_/mutable.rs | 4 +- tests/it/array/union.rs | 28 ++++---- tests/it/arrow.rs | 30 +++++---- tests/it/compute/cast.rs | 6 +- tests/it/compute/comparison.rs | 4 +- tests/it/compute/sort/row/mod.rs | 4 +- tests/it/compute/take.rs | 2 +- tests/it/ffi/data.rs | 12 ++-- tests/it/io/avro/read.rs | 30 ++++++--- tests/it/io/avro/write.rs | 38 ++++++++--- tests/it/io/ipc/mmap.rs | 6 +- tests/it/io/json/read.rs | 9 ++- tests/it/io/json/write.rs | 50 +++++++------- tests/it/io/ndjson/mod.rs | 62 +++++++++++------- tests/it/io/ndjson/read.rs | 29 ++++++--- tests/it/io/parquet/mod.rs | 65 +++++++++++++------ tests/it/io/print.rs | 4 +- tests/it/scalar/map.rs | 20 ++++-- tests/it/scalar/struct_.rs | 6 +- 52 files changed, 460 insertions(+), 278 deletions(-) diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index f7d4a0f43d7..91d358e4484 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -1,4 +1,4 @@ -use std::hint::unreachable_unchecked; +use std::{hint::unreachable_unchecked, sync::Arc}; use crate::{ bitmap::{ @@ -290,7 +290,7 @@ impl DictionaryArray { } pub(crate) fn default_data_type(values_datatype: DataType) -> DataType { - DataType::Dictionary(K::KEY_TYPE, Box::new(values_datatype), false) + DataType::Dictionary(K::KEY_TYPE, Arc::new(values_datatype), false) } /// Slices this [`DictionaryArray`]. diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index 444de34bcc4..98cd689afd8 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -55,7 +55,7 @@ impl From for MutableDictionaryArray Self { data_type: DataType::Dictionary( K::KEY_TYPE, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), false, ), keys: MutablePrimitiveArray::::new(), @@ -72,7 +72,7 @@ impl MutableDictionaryArray { Self { data_type: DataType::Dictionary( K::KEY_TYPE, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), false, ), keys: MutablePrimitiveArray::::new(), diff --git a/src/array/struct_/mod.rs b/src/array/struct_/mod.rs index 767ba8242fc..ed4e89aa47c 100644 --- a/src/array/struct_/mod.rs +++ b/src/array/struct_/mod.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::{ bitmap::Bitmap, datatypes::{DataType, Field}, @@ -28,7 +30,7 @@ pub use mutable::*; /// Field::new("c", DataType::Int32, false), /// ]; /// -/// let array = StructArray::new(DataType::Struct(fields), vec![boolean, int], None); +/// let array = StructArray::new(DataType::Struct(std::sync::Arc::new(fields)), vec![boolean, int], None); /// ``` #[derive(Clone)] pub struct StructArray { @@ -69,7 +71,7 @@ impl StructArray { .try_for_each(|(index, (data_type, child))| { if data_type != child { Err(Error::oos(format!( - "The children DataTypes of a StructArray must equal the children data types. + "The children DataTypes of a StructArray must equal the children data types. However, the field {index} has data type {data_type:?} but the value has data type {child:?}" ))) } else { @@ -153,7 +155,7 @@ impl StructArray { impl StructArray { /// Deconstructs the [`StructArray`] into its individual components. #[must_use] - pub fn into_data(self) -> (Vec, Vec>, Option) { + pub fn into_data(self) -> (Arc>, Vec>, Option) { let Self { data_type, values, diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index e3e664916f8..624a6d93bc3 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -73,7 +73,7 @@ impl UnionArray { .try_for_each(|(index, (data_type, child))| { if data_type != child { Err(Error::oos(format!( - "The children DataTypes of a UnionArray must equal the children data types. + "The children DataTypes of a UnionArray must equal the children data types. However, the field {index} has data type {data_type:?} but the value has data type {child:?}" ))) } else { @@ -352,7 +352,7 @@ impl UnionArray { fn try_get_all(data_type: &DataType) -> Result { match data_type.to_logical_type() { DataType::Union(fields, ids, mode) => { - Ok((fields, ids.as_ref().map(|x| x.as_ref()), *mode)) + Ok((fields, ids.as_ref().map(|x| x.as_slice()), *mode)) } _ => Err(Error::oos( "The UnionArray requires a logical type of DataType::Union", diff --git a/src/compute/cast/dictionary_to.rs b/src/compute/cast/dictionary_to.rs index 101669f6442..39533bdb065 100644 --- a/src/compute/cast/dictionary_to.rs +++ b/src/compute/cast/dictionary_to.rs @@ -89,7 +89,7 @@ where } else { let data_type = DataType::Dictionary( K2::KEY_TYPE, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), is_ordered, ); // Safety: this is safe because given a type `T` that fits in a `usize`, casting it to type `P` either overflows or also fits in a `usize` @@ -116,7 +116,7 @@ where } else { let data_type = DataType::Dictionary( K2::KEY_TYPE, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), is_ordered, ); // some of the values may not fit in `usize` and thus this needs to be checked diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index dc4f187a57e..f652a60f4ab 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -147,10 +147,10 @@ pub enum DataType { /// A list of some logical data type whose offsets are represented as [`i64`]. LargeList(Arc), /// A nested [`DataType`] with a given number of [`Field`]s. - Struct(Vec), + Struct(Arc>), /// A nested datatype that can represent slots of differing types. /// Third argument represents mode - Union(Vec, Option>, UnionMode), + Union(Arc>, Option>>, UnionMode), /// A nested type that is represented as /// /// List> @@ -189,7 +189,7 @@ pub enum DataType { /// arrays or a limited set of primitive types as integers. /// /// The `bool` value indicates the `Dictionary` is sorted if set to `true`. - Dictionary(IntegerType, Box, bool), + Dictionary(IntegerType, Arc, bool), /// Decimal value with precision and scale /// precision is the number of digits in the number and /// scale is the number of decimal places. @@ -198,7 +198,7 @@ pub enum DataType { /// Decimal backed by 256 bits Decimal256(usize, usize), /// Extension type. - Extension(String, Box, Option), + Extension(String, Arc, Option>), } #[cfg(feature = "arrow")] @@ -239,15 +239,29 @@ impl From for arrow_schema::DataType { DataType::LargeList(f) => { Self::LargeList(Box::new(Arc::unwrap_or_clone_polyfill(f).into())) } - DataType::Struct(f) => Self::Struct(f.into_iter().map(Into::into).collect()), + DataType::Struct(f) => Self::Struct( + Arc::unwrap_or_clone_polyfill(f) + .into_iter() + .map(Into::into) + .collect(), + ), DataType::Union(fields, Some(ids), mode) => { - let ids = ids.into_iter().map(|x| x as _).collect(); - let fields = fields.into_iter().map(Into::into).collect(); + let ids = Arc::unwrap_or_clone_polyfill(ids) + .into_iter() + .map(|x| x as _) + .collect(); + let fields = Arc::unwrap_or_clone_polyfill(fields) + .into_iter() + .map(Into::into) + .collect(); Self::Union(fields, ids, mode.into()) } DataType::Union(fields, None, mode) => { let ids = (0..fields.len() as i8).collect(); - let fields = fields.into_iter().map(Into::into).collect(); + let fields = Arc::unwrap_or_clone_polyfill(fields) + .into_iter() + .map(Into::into) + .collect(); Self::Union(fields, ids, mode.into()) } DataType::Map(f, ordered) => { @@ -255,11 +269,11 @@ impl From for arrow_schema::DataType { } DataType::Dictionary(key, value, _) => Self::Dictionary( Box::new(DataType::from(key).into()), - Box::new((*value).into()), + Box::new(Arc::unwrap_or_clone_polyfill(value).into()), ), DataType::Decimal(precision, scale) => Self::Decimal128(precision as _, scale as _), DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), - DataType::Extension(_, d, _) => (*d).into(), + DataType::Extension(_, d, _) => Arc::unwrap_or_clone_polyfill(d).into(), } } } @@ -299,10 +313,10 @@ impl From for DataType { Self::FixedSizeList(Arc::new((*f).into()), size as _) } DataType::LargeList(f) => Self::LargeList(Arc::new((*f).into())), - DataType::Struct(f) => Self::Struct(f.into_iter().map(Into::into).collect()), + DataType::Struct(f) => Self::Struct(Arc::new(f.into_iter().map(Into::into).collect())), DataType::Union(fields, ids, mode) => { - let ids = ids.into_iter().map(|x| x as _).collect(); - let fields = fields.into_iter().map(Into::into).collect(); + let ids = Arc::new(ids.into_iter().map(|x| x as _).collect()); + let fields = Arc::new(fields.into_iter().map(Into::into).collect()); Self::Union(fields, Some(ids), mode.into()) } DataType::Map(f, ordered) => Self::Map(std::sync::Arc::new((*f).into()), ordered), @@ -318,7 +332,7 @@ impl From for DataType { DataType::UInt64 => IntegerType::UInt64, d => panic!("illegal dictionary key type: {d}"), }; - Self::Dictionary(key, Box::new((*value).into()), false) + Self::Dictionary(key, Arc::new((*value).into()), false) } DataType::Decimal128(precision, scale) => Self::Decimal(precision as _, scale as _), DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index b36addc21ff..05bbddabb0c 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -87,7 +87,7 @@ impl ArrowSchema { if let Some(extension_metadata) = extension_metadata { metadata.insert( "ARROW:extension:metadata".to_string(), - extension_metadata.clone(), + extension_metadata.to_string(), ); } @@ -193,14 +193,18 @@ pub(crate) unsafe fn to_field(schema: &ArrowSchema) -> Result { let indices = to_integer_type(schema.format())?; let values = to_field(dictionary)?; let is_ordered = schema.flags & 1 == 1; - DataType::Dictionary(indices, Box::new(values.data_type().clone()), is_ordered) + DataType::Dictionary( + indices, + std::sync::Arc::new(values.data_type().clone()), + is_ordered, + ) } else { to_data_type(schema)? }; let (metadata, extension) = unsafe { metadata_from_bytes(schema.metadata) }; let data_type = if let Some((name, extension_metadata)) = extension { - DataType::Extension(name, Box::new(data_type), extension_metadata) + DataType::Extension(name, Arc::new(data_type), extension_metadata.map(Arc::new)) } else { data_type }; @@ -276,7 +280,7 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result { let children = (0..schema.n_children as usize) .map(|x| to_field(schema.child(x))) .collect::>>()?; - DataType::Struct(children) + DataType::Struct(Arc::new(children)) } other => { match other.splitn(2, ':').collect::>()[..] { @@ -378,7 +382,7 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result { let fields = (0..schema.n_children as usize) .map(|x| to_field(schema.child(x))) .collect::>>()?; - DataType::Union(fields, Some(type_ids), mode) + DataType::Union(Arc::new(fields), Some(Arc::new(type_ids)), mode) } _ => { return Err(Error::OutOfSpec(format!( @@ -576,40 +580,40 @@ mod tests { DataType::List(Arc::new(Field::new("example", DataType::Boolean, false))), DataType::FixedSizeList(Arc::new(Field::new("example", DataType::Boolean, false)), 2), DataType::LargeList(Arc::new(Field::new("example", DataType::Boolean, false))), - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new( "b", DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), true, ), - ]), + ])), DataType::Map( std::sync::Arc::new(Field::new("a", DataType::Int64, true)), true, ), DataType::Union( - vec![ + Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new( "b", DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), true, ), - ], - Some(vec![1, 2]), + ]), + Some(Arc::new(vec![1, 2])), UnionMode::Dense, ), DataType::Union( - vec![ + Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new( "b", DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), true, ), - ], - Some(vec![0, 1]), + ]), + Some(Arc::new(vec![0, 1])), UnionMode::Sparse, ), ]; diff --git a/src/io/avro/read/nested.rs b/src/io/avro/read/nested.rs index 056d9a8f836..cc752e976bb 100644 --- a/src/io/avro/read/nested.rs +++ b/src/io/avro/read/nested.rs @@ -129,7 +129,7 @@ impl FixedItemsUtf8Dictionary { Self { data_type: DataType::Dictionary( IntegerType::Int32, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), false, ), keys: MutablePrimitiveArray::::with_capacity(capacity), diff --git a/src/io/avro/read/schema.rs b/src/io/avro/read/schema.rs index 07b988fd71e..9c70b48e405 100644 --- a/src/io/avro/read/schema.rs +++ b/src/io/avro/read/schema.rs @@ -105,7 +105,7 @@ fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> .iter() .map(|s| schema_to_field(s, None, Metadata::default())) .collect::>>()?; - DataType::Union(fields, None, UnionMode::Dense) + DataType::Union(Arc::new(fields), None, UnionMode::Dense) } } AvroSchema::Record(Record { fields, .. }) => { @@ -119,12 +119,12 @@ fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> schema_to_field(&field.schema, Some(&field.name), props) }) .collect::>()?; - DataType::Struct(fields) + DataType::Struct(std::sync::Arc::new(fields)) } AvroSchema::Enum { .. } => { return Ok(Field::new( name.unwrap_or_default(), - DataType::Dictionary(IntegerType::Int32, Box::new(DataType::Utf8), false), + DataType::Dictionary(IntegerType::Int32, Arc::new(DataType::Utf8), false), false, )) } diff --git a/src/io/ipc/read/schema.rs b/src/io/ipc/read/schema.rs index b625f19d484..4d68f1281ad 100644 --- a/src/io/ipc/read/schema.rs +++ b/src/io/ipc/read/schema.rs @@ -133,7 +133,10 @@ fn deserialize_union(union_: UnionRef, field: FieldRef) -> Result<(DataType, Ipc fields: ipc_fields, dictionary_id: None, }; - Ok((DataType::Union(fields, ids, mode), ipc_field)) + Ok(( + DataType::Union(Arc::new(fields), ids.map(Arc::new), mode), + ipc_field, + )) } fn deserialize_map(map: MapRef, field: FieldRef) -> Result<(DataType, IpcField)> { @@ -172,7 +175,7 @@ fn deserialize_struct(field: FieldRef) -> Result<(DataType, IpcField)> { fields: ipc_fields, dictionary_id: None, }; - Ok((DataType::Struct(fields), ipc_field)) + Ok((DataType::Struct(std::sync::Arc::new(fields)), ipc_field)) } fn deserialize_list(field: FieldRef) -> Result<(DataType, IpcField)> { @@ -252,7 +255,7 @@ fn get_data_type( let (inner, mut ipc_field) = get_data_type(field, extension, false)?; ipc_field.dictionary_id = Some(dictionary.id()?); return Ok(( - DataType::Dictionary(index_type, Box::new(inner), dictionary.is_ordered()?), + DataType::Dictionary(index_type, Arc::new(inner), dictionary.is_ordered()?), ipc_field, )); } @@ -262,7 +265,7 @@ fn get_data_type( let (name, metadata) = extension; let (data_type, fields) = get_data_type(field, None, false)?; return Ok(( - DataType::Extension(name, Box::new(data_type), metadata), + DataType::Extension(name, Arc::new(data_type), metadata.map(Arc::new)), fields, )); } diff --git a/src/io/ipc/write/schema.rs b/src/io/ipc/write/schema.rs index 5c35c8104f3..e1fa2b97aba 100644 --- a/src/io/ipc/write/schema.rs +++ b/src/io/ipc/write/schema.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow_format::ipc::planus::Builder; use crate::datatypes::{ @@ -71,14 +73,14 @@ fn write_metadata(metadata: &Metadata, kv_vec: &mut Vec, + metadata: &Option>, kv_vec: &mut Vec, ) { // metadata if let Some(metadata) = metadata { let entry = arrow_format::ipc::KeyValue { key: Some("ARROW:extension:metadata".to_string()), - value: Some(metadata.clone()), + value: Some(metadata.to_string()), }; kv_vec.push(entry); } @@ -247,7 +249,7 @@ fn serialize_type(data_type: &DataType) -> arrow_format::ipc::Type { UnionMode::Dense => ipc::UnionMode::Dense, UnionMode::Sparse => ipc::UnionMode::Sparse, }, - type_ids: type_ids.clone(), + type_ids: type_ids.as_ref().map(|type_ids| type_ids.to_vec()), })), Map(_, keys_sorted) => ipc::Type::Map(Box::new(ipc::Map { keys_sorted: *keys_sorted, diff --git a/src/io/json/read/infer_schema.rs b/src/io/json/read/infer_schema.rs index 13f0c50360f..e036c802885 100644 --- a/src/io/json/read/infer_schema.rs +++ b/src/io/json/read/infer_schema.rs @@ -1,4 +1,5 @@ use std::borrow::Borrow; +use std::sync::Arc; use indexmap::map::IndexMap as HashMap; use indexmap::set::IndexSet as HashSet; @@ -82,7 +83,7 @@ fn infer_object(inner: &HashMap) -> Result { Ok(Field::new(key, dt, true)) }) .collect::>>()?; - Ok(DataType::Struct(fields)) + Ok(DataType::Struct(std::sync::Arc::new(fields))) } fn infer_array(values: &[Value]) -> Result { @@ -141,7 +142,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> DataType // all are structs => union of all fields (that may have equal names) let fields = datatypes.iter().fold(vec![], |mut acc, dt| { if let Struct(new_fields) = dt.borrow() { - acc.extend(new_fields); + acc.extend(new_fields.as_slice()); }; acc }); @@ -170,7 +171,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> DataType Field::new(name, coerce_data_type(&dts), true) }) .collect(); - return Struct(fields); + return Struct(Arc::new(fields)); } else if datatypes.len() > 2 { return Utf8; } @@ -214,11 +215,17 @@ mod test { List(std::sync::Arc::new(Field::new(ITEM_NAME, Float64, true))), ); assert_eq!( - coerce_data_type(&[Float64, List(std::sync::Arc::new(Field::new(ITEM_NAME, Int64, true)))]), + coerce_data_type(&[ + Float64, + List(std::sync::Arc::new(Field::new(ITEM_NAME, Int64, true))) + ]), List(std::sync::Arc::new(Field::new(ITEM_NAME, Float64, true))), ); assert_eq!( - coerce_data_type(&[Int64, List(std::sync::Arc::new(Field::new(ITEM_NAME, Int64, true)))]), + coerce_data_type(&[ + Int64, + List(std::sync::Arc::new(Field::new(ITEM_NAME, Int64, true))) + ]), List(std::sync::Arc::new(Field::new(ITEM_NAME, Int64, true))), ); // boolean and number are incompatible, return utf8 diff --git a/src/io/json_integration/read/schema.rs b/src/io/json_integration/read/schema.rs index 66b88f1f8b7..c2b08762570 100644 --- a/src/io/json_integration/read/schema.rs +++ b/src/io/json_integration/read/schema.rs @@ -259,7 +259,7 @@ fn to_data_type(item: &Value, mut children: Vec) -> Result { )); } } - "struct" => DataType::Struct(children), + "struct" => DataType::Struct(Arc::new(children)), "union" => { let mode = if let Some(Value::String(mode)) = item.get("mode") { UnionMode::sparse(mode == "SPARSE") @@ -267,11 +267,13 @@ fn to_data_type(item: &Value, mut children: Vec) -> Result { return Err(Error::OutOfSpec("union requires mode".to_string())); }; let ids = if let Some(Value::Array(ids)) = item.get("typeIds") { - Some(ids.iter().map(|x| x.as_i64().unwrap() as i32).collect()) + Some(Arc::new( + ids.iter().map(|x| x.as_i64().unwrap() as i32).collect(), + )) } else { return Err(Error::OutOfSpec("union requires ids".to_string())); }; - DataType::Union(children, ids, mode) + DataType::Union(Arc::new(children), ids, mode) } "map" => { let sorted_keys = if let Some(Value::Bool(sorted_keys)) = item.get("keysSorted") { @@ -370,7 +372,7 @@ fn deserialize_field(value: &Value) -> Result { let data_type = to_data_type(type_, children)?; let data_type = if let Some((name, metadata)) = extension { - DataType::Extension(name, Box::new(data_type), metadata) + DataType::Extension(name, Arc::new(data_type), metadata.map(Arc::new)) } else { data_type }; @@ -392,7 +394,7 @@ fn deserialize_field(value: &Value) -> Result { )); } }; - DataType::Dictionary(index_type, Box::new(data_type), is_ordered) + DataType::Dictionary(index_type, Arc::new(data_type), is_ordered) } else { data_type }; diff --git a/src/io/orc/read/mod.rs b/src/io/orc/read/mod.rs index 3fe4abb7f63..32c4465c0bf 100644 --- a/src/io/orc/read/mod.rs +++ b/src/io/orc/read/mod.rs @@ -1,9 +1,10 @@ //! APIs to read from [ORC format](https://orc.apache.org). use std::io::Read; +use std::sync::Arc; use crate::array::{Array, BinaryArray, BooleanArray, Int64Array, PrimitiveArray, Utf8Array}; use crate::bitmap::{Bitmap, MutableBitmap}; -use crate::datatypes::{DataType, Field, Schema}; +use crate::datatypes::{ArcExt, DataType, Field, Schema}; use crate::error::Error; use crate::offset::{Offset, Offsets}; use crate::types::NativeType; @@ -21,7 +22,7 @@ pub fn infer_schema(footer: &Footer) -> Result { let dt = infer_dt(&footer.types[0], types)?; if let DataType::Struct(fields) = dt { - Ok(fields.into()) + Ok(Arc::unwrap_or_clone_polyfill(fields).into()) } else { Err(Error::ExternalFormat( "ORC root type must be a struct".to_string(), @@ -57,7 +58,7 @@ fn infer_dt(type_: &Type, types: &[Type]) -> Result { .map(|dt| Field::new(name, dt, true)) }) .collect::, Error>>()?; - DataType::Struct(sub_types) + DataType::Struct(Arc::new(sub_types)) } kind => return Err(Error::nyi(format!("Reading {kind:?} from ORC"))), }; diff --git a/src/io/parquet/read/deserialize/binary/dictionary.rs b/src/io/parquet/read/deserialize/binary/dictionary.rs index 6f883528ef8..df8d3988c8c 100644 --- a/src/io/parquet/read/deserialize/binary/dictionary.rs +++ b/src/io/parquet/read/deserialize/binary/dictionary.rs @@ -1,11 +1,11 @@ -use std::collections::VecDeque; +use std::{collections::VecDeque, sync::Arc}; use parquet2::page::DictPage; use crate::{ array::{Array, BinaryArray, DictionaryArray, DictionaryKey, Utf8Array}, bitmap::MutableBitmap, - datatypes::{DataType, PhysicalType}, + datatypes::{ArcExt, DataType, PhysicalType}, error::Result, io::parquet::read::deserialize::nested_utils::{InitNested, NestedState}, offset::Offset, @@ -53,7 +53,7 @@ where fn read_dict(data_type: DataType, dict: &DictPage) -> Box { let data_type = match data_type { - DataType::Dictionary(_, values, _) => *values, + DataType::Dictionary(_, values, _) => Arc::unwrap_or_clone_polyfill(values), _ => data_type, }; diff --git a/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs b/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs index 680834ad270..27ef41312e2 100644 --- a/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs +++ b/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs @@ -1,11 +1,11 @@ -use std::collections::VecDeque; +use std::{collections::VecDeque, sync::Arc}; use parquet2::page::DictPage; use crate::{ array::{Array, DictionaryArray, DictionaryKey, FixedSizeBinaryArray}, bitmap::MutableBitmap, - datatypes::DataType, + datatypes::{ArcExt, DataType}, error::Result, io::parquet::read::deserialize::nested_utils::{InitNested, NestedState}, }; @@ -48,7 +48,7 @@ where fn read_dict(data_type: DataType, dict: &DictPage) -> Box { let data_type = match data_type { - DataType::Dictionary(_, values, _) => *values, + DataType::Dictionary(_, values, _) => Arc::unwrap_or_clone_polyfill(values), _ => data_type, }; diff --git a/src/io/parquet/read/deserialize/primitive/dictionary.rs b/src/io/parquet/read/deserialize/primitive/dictionary.rs index 16fec526112..b46cc6f7286 100644 --- a/src/io/parquet/read/deserialize/primitive/dictionary.rs +++ b/src/io/parquet/read/deserialize/primitive/dictionary.rs @@ -1,11 +1,11 @@ -use std::collections::VecDeque; +use std::{collections::VecDeque, sync::Arc}; use parquet2::{page::DictPage, types::NativeType as ParquetNativeType}; use crate::{ array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray}, bitmap::MutableBitmap, - datatypes::DataType, + datatypes::{ArcExt, DataType}, error::Result, types::NativeType, }; @@ -24,7 +24,7 @@ where F: Copy + Fn(P) -> T, { let data_type = match data_type { - DataType::Dictionary(_, values, _) => *values, + DataType::Dictionary(_, values, _) => Arc::unwrap_or_clone_polyfill(values), _ => data_type, }; let values = deserialize_plain(&dict.buffer, op); diff --git a/src/io/parquet/read/deserialize/struct_.rs b/src/io/parquet/read/deserialize/struct_.rs index dd5776948cd..72209260df7 100644 --- a/src/io/parquet/read/deserialize/struct_.rs +++ b/src/io/parquet/read/deserialize/struct_.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::array::{Array, StructArray}; use crate::datatypes::{DataType, Field}; use crate::error::Error; @@ -7,12 +9,12 @@ use super::nested_utils::{NestedArrayIter, NestedState}; /// An iterator adapter over [`NestedArrayIter`] assumed to be encoded as Struct arrays pub struct StructIterator<'a> { iters: Vec>, - fields: Vec, + fields: Arc>, } impl<'a> StructIterator<'a> { /// Creates a new [`StructIterator`] with `iters` and `fields`. - pub fn new(iters: Vec>, fields: Vec) -> Self { + pub fn new(iters: Vec>, fields: Arc>) -> Self { assert_eq!(iters.len(), fields.len()); Self { iters, fields } } diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 1d6442ca52a..04dcf2d6547 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -238,7 +238,7 @@ fn to_struct(fields: &[ParquetType]) -> Option { if fields.is_empty() { None } else { - Some(DataType::Struct(fields)) + Some(DataType::Struct(std::sync::Arc::new(fields))) } } @@ -623,10 +623,10 @@ mod tests { // }; // } { - let arrow_struct = DataType::Struct(vec![ + let arrow_struct = DataType::Struct(Arc::new(vec![ Field::new("str", DataType::Utf8, false), Field::new("num", DataType::Int32, false), - ]); + ])); arrow_fields.push(Field::new( "my_list", DataType::List(std::sync::Arc::new(Field::new( @@ -646,7 +646,8 @@ mod tests { // } // Special case: group is named array { - let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); + let arrow_struct = + DataType::Struct(Arc::new(vec![Field::new("str", DataType::Utf8, false)])); arrow_fields.push(Field::new( "my_list", DataType::List(std::sync::Arc::new(Field::new("array", arrow_struct, true))), @@ -662,7 +663,8 @@ mod tests { // } // Special case: group named ends in _tuple { - let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); + let arrow_struct = + DataType::Struct(Arc::new(vec![Field::new("str", DataType::Utf8, false)])); arrow_fields.push(Field::new( "my_list", DataType::List(std::sync::Arc::new(Field::new( @@ -784,10 +786,10 @@ mod tests { fn test_nested_schema() -> Result<()> { let mut arrow_fields = Vec::new(); { - let group1_fields = vec![ + let group1_fields = Arc::new(vec![ Field::new("leaf1", DataType::Boolean, false), Field::new("leaf2", DataType::Int32, false), - ]; + ]); let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false); arrow_fields.push(group1_struct); @@ -822,7 +824,7 @@ mod tests { "innerGroup", DataType::List(std::sync::Arc::new(Field::new( "innerGroup", - DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]), + DataType::Struct(Arc::new(vec![Field::new("leaf3", DataType::Int32, true)])), true, ))), true, @@ -832,10 +834,10 @@ mod tests { "outerGroup", DataType::List(std::sync::Arc::new(Field::new( "outerGroup", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("leaf2", DataType::Int32, true), inner_group_list, - ]), + ])), true, ))), true, @@ -1016,7 +1018,7 @@ mod tests { ), Field::new( "struct", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("bools", DataType::Boolean, false), Field::new("uint32", DataType::UInt32, false), Field::new( @@ -1028,7 +1030,7 @@ mod tests { ))), false, ), - ]), + ])), false, ), Field::new("dictionary_strings", DataType::Utf8, false), diff --git a/src/io/parquet/read/statistics/mod.rs b/src/io/parquet/read/statistics/mod.rs index b2f1766c015..7c609228f5f 100644 --- a/src/io/parquet/read/statistics/mod.rs +++ b/src/io/parquet/read/statistics/mod.rs @@ -204,12 +204,12 @@ fn make_mutable(data_type: &DataType, capacity: usize) -> Result DataType { if let DataType::Struct(fields) = data_type.to_logical_type() { - DataType::Struct( + DataType::Struct(Arc::new( fields .iter() .map(|f| Field::new(&f.name, create_dt(&f.data_type), f.is_nullable)) .collect(), - ) + )) } else if let DataType::Map(f, ordered) = data_type.to_logical_type() { DataType::Map( Arc::new(Field::new(&f.name, create_dt(&f.data_type), f.is_nullable)), diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 4b86d707f68..2e60d0b2790 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -707,7 +707,7 @@ fn transverse_recursive T + Clone>( } Struct => { if let DataType::Struct(fields) = data_type.to_logical_type() { - for field in fields { + for field in fields.as_slice() { transverse_recursive(&field.data_type, map.clone(), encodings) } } else { @@ -717,7 +717,7 @@ fn transverse_recursive T + Clone>( Map => { if let DataType::Map(field, _) = data_type.to_logical_type() { if let DataType::Struct(fields) = field.data_type.to_logical_type() { - for field in fields { + for field in fields.as_slice() { transverse_recursive(&field.data_type, map.clone(), encodings) } } else { diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index eae4b70250b..0644f6b48d7 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -258,6 +258,8 @@ pub fn array_to_columns + Send + Sync>( #[cfg(test)] mod tests { + use std::sync::Arc; + use parquet2::schema::types::{GroupLogicalType, PrimitiveConvertedType, PrimitiveLogicalType}; use parquet2::schema::Repetition; @@ -280,7 +282,7 @@ mod tests { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), ); @@ -344,7 +346,7 @@ mod tests { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), ); @@ -355,7 +357,7 @@ mod tests { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![Box::new(array.clone()), Box::new(array)], None, ); @@ -447,13 +449,17 @@ mod tests { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), ); let array = ListArray::new( - DataType::List(std::sync::Arc::new(Field::new("l", array.data_type().clone(), true))), + DataType::List(std::sync::Arc::new(Field::new( + "l", + array.data_type().clone(), + true, + ))), vec![0i32, 2, 4].try_into().unwrap(), Box::new(array), None, @@ -540,10 +546,10 @@ mod tests { #[test] fn test_map() { - let kv_type = DataType::Struct(vec![ + let kv_type = DataType::Struct(Arc::new(vec![ Field::new("k", DataType::Utf8, false), Field::new("v", DataType::Int32, false), - ]); + ])); let kv_field = Field::new("kv", kv_type.clone(), false); let map_type = DataType::Map(std::sync::Arc::new(kv_field), false); diff --git a/tests/it/array/dictionary/mod.rs b/tests/it/array/dictionary/mod.rs index 0ee0c374764..0a2e882465a 100644 --- a/tests/it/array/dictionary/mod.rs +++ b/tests/it/array/dictionary/mod.rs @@ -1,12 +1,17 @@ mod mutable; +use std::sync::Arc; + use arrow2::{array::*, datatypes::DataType}; #[test] fn try_new_ok() { let values = Utf8Array::::from_slice(["a", "aa"]); - let data_type = - DataType::Dictionary(i32::KEY_TYPE, Box::new(values.data_type().clone()), false); + let data_type = DataType::Dictionary( + i32::KEY_TYPE, + std::sync::Arc::new(values.data_type().clone()), + false, + ); let array = DictionaryArray::try_new( data_type, PrimitiveArray::from_vec(vec![1, 0]), @@ -27,8 +32,11 @@ fn try_new_ok() { #[test] fn try_new_incorrect_key() { let values = Utf8Array::::from_slice(["a", "aa"]); - let data_type = - DataType::Dictionary(i16::KEY_TYPE, Box::new(values.data_type().clone()), false); + let data_type = DataType::Dictionary( + i16::KEY_TYPE, + std::sync::Arc::new(values.data_type().clone()), + false, + ); let r = DictionaryArray::try_new( data_type, @@ -47,8 +55,11 @@ fn try_new_nulls() { let value: &[&str] = &[]; let values = Utf8Array::::from_slice(value); - let data_type = - DataType::Dictionary(u32::KEY_TYPE, Box::new(values.data_type().clone()), false); + let data_type = DataType::Dictionary( + u32::KEY_TYPE, + std::sync::Arc::new(values.data_type().clone()), + false, + ); let r = DictionaryArray::try_new(data_type, keys, values.boxed()).is_ok(); assert!(r); @@ -72,7 +83,7 @@ fn try_new_incorrect_dt() { #[test] fn try_new_incorrect_values_dt() { let values = Utf8Array::::from_slice(["a", "aa"]); - let data_type = DataType::Dictionary(i32::KEY_TYPE, Box::new(DataType::LargeUtf8), false); + let data_type = DataType::Dictionary(i32::KEY_TYPE, Arc::new(DataType::LargeUtf8), false); let r = DictionaryArray::try_new( data_type, @@ -106,7 +117,7 @@ fn try_new_out_of_bounds_neg() { #[test] fn new_null() { - let dt = DataType::Dictionary(i16::KEY_TYPE, Box::new(DataType::Int32), false); + let dt = DataType::Dictionary(i16::KEY_TYPE, Arc::new(DataType::Int32), false); let array = DictionaryArray::::new_null(dt, 2); assert_eq!(format!("{array:?}"), "DictionaryArray[None, None]"); @@ -114,7 +125,7 @@ fn new_null() { #[test] fn new_empty() { - let dt = DataType::Dictionary(i16::KEY_TYPE, Box::new(DataType::Int32), false); + let dt = DataType::Dictionary(i16::KEY_TYPE, Arc::new(DataType::Int32), false); let array = DictionaryArray::::new_empty(dt); assert_eq!(format!("{array:?}"), "DictionaryArray[]"); diff --git a/tests/it/array/fixed_size_binary/mod.rs b/tests/it/array/fixed_size_binary/mod.rs index c5524248ff5..cf322086a2d 100644 --- a/tests/it/array/fixed_size_binary/mod.rs +++ b/tests/it/array/fixed_size_binary/mod.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{array::FixedSizeBinaryArray, bitmap::Bitmap, buffer::Buffer, datatypes::DataType}; mod mutable; @@ -89,7 +91,7 @@ fn to() { let extension = DataType::Extension( "a".to_string(), - Box::new(DataType::FixedSizeBinary(2)), + Arc::new(DataType::FixedSizeBinary(2)), None, ); let _ = a.to(extension); diff --git a/tests/it/array/growable/list.rs b/tests/it/array/growable/list.rs index 45006b6e3d6..5709aaf929a 100644 --- a/tests/it/array/growable/list.rs +++ b/tests/it/array/growable/list.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::{ growable::{Growable, GrowableList}, @@ -23,7 +25,7 @@ fn extension() { let array = create_list_array(data); let data_type = - DataType::Extension("ext".to_owned(), Box::new(array.data_type().clone()), None); + DataType::Extension("ext".to_owned(), Arc::new(array.data_type().clone()), None); let array_ext = ListArray::new( data_type, array.offsets().clone(), diff --git a/tests/it/array/growable/map.rs b/tests/it/array/growable/map.rs index e98b98903b3..4025e6b52b7 100644 --- a/tests/it/array/growable/map.rs +++ b/tests/it/array/growable/map.rs @@ -29,7 +29,7 @@ fn some_values() -> (DataType, Vec>) { Field::new("key", DataType::Utf8, true), Field::new("val", DataType::Int32, true), ]; - (DataType::Struct(fields), vec![strings, ints]) + (DataType::Struct(std::sync::Arc::new(fields)), vec![strings, ints]) } #[test] diff --git a/tests/it/array/growable/mod.rs b/tests/it/array/growable/mod.rs index d4b034a13e6..d614f1b411e 100644 --- a/tests/it/array/growable/mod.rs +++ b/tests/it/array/growable/mod.rs @@ -11,6 +11,8 @@ mod struct_; mod union; mod utf8; +use std::sync::Arc; + use arrow2::array::growable::make_growable; use arrow2::array::*; use arrow2::datatypes::{DataType, Field}; @@ -49,18 +51,18 @@ fn test_make_growable_extension() { .unwrap(); make_growable(&[&array], false, 2); - let data_type = DataType::Extension("ext".to_owned(), Box::new(DataType::Int32), None); + let data_type = DataType::Extension("ext".to_owned(), Arc::new(DataType::Int32), None); let array = Int32Array::from_slice([1, 2]).to(data_type.clone()); let array_grown = make_growable(&[&array], false, 2).as_box(); assert_eq!(array_grown.data_type(), &data_type); let data_type = DataType::Extension( "ext".to_owned(), - Box::new(DataType::Struct(vec![Field::new( + Arc::new(DataType::Struct(Arc::new(vec![Field::new( "a", DataType::Int32, false, - )])), + )]))), None, ); let array = StructArray::new( diff --git a/tests/it/array/growable/struct_.rs b/tests/it/array/growable/struct_.rs index 9596f23961d..9ab9ba7303f 100644 --- a/tests/it/array/growable/struct_.rs +++ b/tests/it/array/growable/struct_.rs @@ -24,7 +24,7 @@ fn some_values() -> (DataType, Vec>) { Field::new("f1", DataType::Utf8, true), Field::new("f2", DataType::Int32, true), ]; - (DataType::Struct(fields), vec![strings, ints]) + (DataType::Struct(std::sync::Arc::new(fields)), vec![strings, ints]) } #[test] diff --git a/tests/it/array/growable/union.rs b/tests/it/array/growable/union.rs index 520a64092e4..756d4458f1f 100644 --- a/tests/it/array/growable/union.rs +++ b/tests/it/array/growable/union.rs @@ -13,7 +13,7 @@ fn sparse() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -45,7 +45,7 @@ fn dense() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -83,7 +83,7 @@ fn complex_dense() -> Result<()> { Field::new("c", fixed_size_type.clone(), true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); // UnionArray[1, [11, 12, 13], abcd, [21, 22, 23], 2] let types = vec![0, 2, 1, 2, 0].into(); diff --git a/tests/it/array/map/mod.rs b/tests/it/array/map/mod.rs index 1d3ab488554..1a6bbe2ffa3 100644 --- a/tests/it/array/map/mod.rs +++ b/tests/it/array/map/mod.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::*, datatypes::{DataType, Field}, @@ -5,11 +7,14 @@ use arrow2::{ #[test] fn basics() { - let dt = DataType::Struct(vec![ + let dt = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Utf8, true), Field::new("b", DataType::Utf8, true), - ]); - let data_type = DataType::Map(std::sync::Arc::new(Field::new("a", dt.clone(), true)), false); + ])); + let data_type = DataType::Map( + std::sync::Arc::new(Field::new("a", dt.clone(), true)), + false, + ); let field = StructArray::new( dt.clone(), diff --git a/tests/it/array/mod.rs b/tests/it/array/mod.rs index 628daa47451..188f6081c50 100644 --- a/tests/it/array/mod.rs +++ b/tests/it/array/mod.rs @@ -13,6 +13,8 @@ mod struct_; mod union; mod utf8; +use std::sync::Arc; + use arrow2::array::{clone, new_empty_array, new_null_array, Array, PrimitiveArray}; use arrow2::bitmap::Bitmap; use arrow2::datatypes::{DataType, Field, UnionMode}; @@ -34,12 +36,12 @@ fn nulls() { // unions' null count is always 0 let datatypes = vec![ DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Dense, ), DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Sparse, ), @@ -60,20 +62,20 @@ fn empty() { DataType::List(std::sync::Arc::new(Field::new("a", DataType::Binary, true))), DataType::List(std::sync::Arc::new(Field::new( "a", - DataType::Extension("ext".to_owned(), Box::new(DataType::Int32), None), + DataType::Extension("ext".to_owned(), Arc::new(DataType::Int32), None), true, ))), DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Sparse, ), DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Dense, ), - DataType::Struct(vec![Field::new("a", DataType::Int32, true)]), + DataType::Struct(Arc::new(vec![Field::new("a", DataType::Int32, true)])), ]; let a = datatypes.into_iter().all(|x| new_empty_array(x).len() == 0); assert!(a); @@ -88,20 +90,20 @@ fn empty_extension() { DataType::Binary, DataType::List(std::sync::Arc::new(Field::new("a", DataType::Binary, true))), DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Sparse, ), DataType::Union( - vec![Field::new("a", DataType::Binary, true)], + Arc::new(vec![Field::new("a", DataType::Binary, true)]), None, UnionMode::Dense, ), - DataType::Struct(vec![Field::new("a", DataType::Int32, true)]), + DataType::Struct(Arc::new(vec![Field::new("a", DataType::Int32, true)])), ]; let a = datatypes .into_iter() - .map(|dt| DataType::Extension("ext".to_owned(), Box::new(dt), None)) + .map(|dt| DataType::Extension("ext".to_owned(), Arc::new(dt), None)) .all(|x| { let a = new_empty_array(x); a.len() == 0 && matches!(a.data_type(), DataType::Extension(_, _, _)) diff --git a/tests/it/array/struct_/iterator.rs b/tests/it/array/struct_/iterator.rs index be4a5eefbb4..a7190986e2e 100644 --- a/tests/it/array/struct_/iterator.rs +++ b/tests/it/array/struct_/iterator.rs @@ -13,7 +13,7 @@ fn test_simple_iter() { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.clone(), int.clone()], None, ); diff --git a/tests/it/array/struct_/mod.rs b/tests/it/array/struct_/mod.rs index cd32eee3f75..5e467b03796 100644 --- a/tests/it/array/struct_/mod.rs +++ b/tests/it/array/struct_/mod.rs @@ -16,7 +16,7 @@ fn debug() { ]; let array = StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), ); diff --git a/tests/it/array/struct_/mutable.rs b/tests/it/array/struct_/mutable.rs index 19f2f12f15e..fdc6f5f204d 100644 --- a/tests/it/array/struct_/mutable.rs +++ b/tests/it/array/struct_/mutable.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::*, datatypes::{DataType, Field}, @@ -7,7 +9,7 @@ use arrow2::{ fn push() { let c1 = Box::new(MutablePrimitiveArray::::new()) as Box; let values = vec![c1]; - let data_type = DataType::Struct(vec![Field::new("f1", DataType::Int32, true)]); + let data_type = DataType::Struct(Arc::new(vec![Field::new("f1", DataType::Int32, true)])); let mut a = MutableStructArray::new(data_type, values); a.value::>(0) diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index 4a8c3aee214..32e3dca0b77 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -25,7 +25,7 @@ fn sparse_debug() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -45,7 +45,7 @@ fn dense_debug() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -66,7 +66,7 @@ fn slice() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -94,7 +94,7 @@ fn iter_sparse() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), @@ -127,7 +127,7 @@ fn iter_dense() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = Buffer::from(vec![0, 0, 1]); let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ @@ -161,7 +161,7 @@ fn iter_sparse_slice() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Int32Array::from(&[Some(1), Some(3), Some(2)]).boxed(), @@ -187,7 +187,7 @@ fn iter_dense_slice() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = Buffer::from(vec![0, 0, 1]); let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ @@ -214,7 +214,7 @@ fn scalar() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = Buffer::from(vec![0, 0, 1]); let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ @@ -271,7 +271,7 @@ fn dense_without_offsets_is_error() { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Dense); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Dense); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), @@ -287,7 +287,7 @@ fn fields_must_match() { Field::new("a", DataType::Int64, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = vec![0, 0, 1].into(); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), @@ -303,7 +303,7 @@ fn sparse_with_offsets_is_error() { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), @@ -321,7 +321,7 @@ fn offsets_must_be_in_bounds() { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), @@ -340,7 +340,7 @@ fn sparse_with_wrong_offsets1_is_error() { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), @@ -359,7 +359,7 @@ fn types_must_be_in_bounds() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let fields = vec![ Int32Array::from([Some(1), Some(3), Some(2)]).boxed(), Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), diff --git a/tests/it/arrow.rs b/tests/it/arrow.rs index 7f013bfe2db..387235ce409 100644 --- a/tests/it/arrow.rs +++ b/tests/it/arrow.rs @@ -142,11 +142,11 @@ fn make_struct() -> StructArray { let nulls = [true, true, false].into_iter().collect(); StructArray::new( - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("a1", a1.data_type().clone(), true), Field::new("a2", a2.data_type().clone(), true), Field::new("a3", a3.data_type().clone(), true), - ]), + ])), vec![Box::new(a1), Box::new(a2), Box::new(a3)], Some(nulls), ) @@ -235,7 +235,7 @@ fn test_dictionary() { let dictionary = DictionaryArray::try_new( DataType::Dictionary( IntegerType::Int16, - Box::new(values.data_type().clone()), + std::sync::Arc::new(values.data_type().clone()), false, ), keys, @@ -287,10 +287,10 @@ fn test_map() { ); let values = PrimitiveArray::::from_iter([Some(1), None, Some(3), Some(1), None]); let fields = StructArray::new( - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("keys", DataType::Utf8, false), // Cannot be nullable Field::new("values", DataType::Int32, true), - ]), + ])), vec![Box::new(keys), Box::new(values)], None, // Cannot be nullable ); @@ -316,10 +316,10 @@ fn test_map() { #[test] fn test_dense_union() { - let fields = vec![ + let fields = Arc::new(vec![ Field::new("a1", DataType::Int32, true), Field::new("a2", DataType::Int64, true), - ]; + ]); let a1 = PrimitiveArray::from_iter([Some(2), None]); let a2 = PrimitiveArray::from_iter([Some(2_i64), None, Some(3)]); @@ -327,7 +327,7 @@ fn test_dense_union() { let types = vec![1, 0, 0, 1, 1]; let offsets = vec![0, 0, 1, 1, 2]; let union = UnionArray::new( - DataType::Union(fields.clone(), Some(vec![0, 1]), UnionMode::Dense), + DataType::Union(fields.clone(), Some(Arc::new(vec![0, 1])), UnionMode::Dense), types.into(), vec![Box::new(a1.clone()), Box::new(a2.clone())], Some(offsets.into()), @@ -338,7 +338,7 @@ fn test_dense_union() { let types = vec![1, 4, 4, 1, 1]; let offsets = vec![0, 0, 1, 1, 2]; let union = UnionArray::new( - DataType::Union(fields, Some(vec![4, 1]), UnionMode::Dense), + DataType::Union(fields, Some(Arc::new(vec![4, 1])), UnionMode::Dense), types.into(), vec![Box::new(a1), Box::new(a2)], Some(offsets.into()), @@ -349,17 +349,21 @@ fn test_dense_union() { #[test] fn test_sparse_union() { - let fields = vec![ + let fields = Arc::new(vec![ Field::new("a1", DataType::Int32, true), Field::new("a2", DataType::Int64, true), - ]; + ]); let a1 = PrimitiveArray::from_iter([None, Some(2), None, None, None]); let a2 = PrimitiveArray::from_iter([Some(2_i64), None, None, None, Some(3)]); let types = vec![1, 0, 0, 1, 1]; let union = UnionArray::new( - DataType::Union(fields.clone(), Some(vec![0, 1]), UnionMode::Sparse), + DataType::Union( + fields.clone(), + Some(Arc::new(vec![0, 1])), + UnionMode::Sparse, + ), types.into(), vec![Box::new(a1.clone()), Box::new(a2.clone())], None, @@ -369,7 +373,7 @@ fn test_sparse_union() { let types = vec![1, 4, 4, 1, 1]; let union = UnionArray::new( - DataType::Union(fields, Some(vec![4, 1]), UnionMode::Sparse), + DataType::Union(fields, Some(Arc::new(vec![4, 1])), UnionMode::Sparse), types.into(), vec![Box::new(a1), Box::new(a2)], None, diff --git a/tests/it/compute/cast.rs b/tests/it/compute/cast.rs index 22ec3fd040e..131f834f968 100644 --- a/tests/it/compute/cast.rs +++ b/tests/it/compute/cast.rs @@ -689,7 +689,7 @@ fn utf8_to_dict() { let array = Utf8Array::::from([Some("one"), None, Some("three"), Some("one")]); // Cast to a dictionary (same value type, Utf8) - let cast_type = DataType::Dictionary(u8::KEY_TYPE, Box::new(DataType::Utf8), false); + let cast_type = DataType::Dictionary(u8::KEY_TYPE, Arc::new(DataType::Utf8), false); let result = cast(&array, &cast_type, CastOptions::default()).expect("cast failed"); let mut expected = MutableDictionaryArray::>::new(); @@ -720,7 +720,7 @@ fn i32_to_dict() { let array = Int32Array::from(&[Some(1), None, Some(3), Some(1)]); // Cast to a dictionary (same value type, Utf8) - let cast_type = DataType::Dictionary(u8::KEY_TYPE, Box::new(DataType::Int32), false); + let cast_type = DataType::Dictionary(u8::KEY_TYPE, Arc::new(DataType::Int32), false); let result = cast(&array, &cast_type, CastOptions::default()).expect("cast failed"); let mut expected = MutableDictionaryArray::>::new(); @@ -902,7 +902,7 @@ fn dict_keys() { let result = cast( &array, - &DataType::Dictionary(IntegerType::Int64, Box::new(DataType::Utf8), false), + &DataType::Dictionary(IntegerType::Int64, Arc::new(DataType::Utf8), false), CastOptions::default(), ) .expect("cast failed"); diff --git a/tests/it/compute/comparison.rs b/tests/it/compute/comparison.rs index a63bb39ce01..9e60fb071a4 100644 --- a/tests/it/compute/comparison.rs +++ b/tests/it/compute/comparison.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::array::*; use arrow2::bitmap::Bitmap; use arrow2::compute::comparison::{self, boolean::*, primitive, utf8}; @@ -42,7 +44,7 @@ fn consistency() { Duration(TimeUnit::Millisecond), Duration(TimeUnit::Microsecond), Duration(TimeUnit::Nanosecond), - Dictionary(IntegerType::Int32, Box::new(LargeBinary), false), + Dictionary(IntegerType::Int32, Arc::new(LargeBinary), false), ]; // array <> array diff --git a/tests/it/compute/sort/row/mod.rs b/tests/it/compute/sort/row/mod.rs index 4931689a192..4ec7617b265 100644 --- a/tests/it/compute/sort/row/mod.rs +++ b/tests/it/compute/sort/row/mod.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::{ Array, BinaryArray, BooleanArray, DictionaryArray, Float32Array, Int128Array, Int16Array, @@ -265,7 +267,7 @@ fn test_dictionary_nulls() { let values = Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]); let keys = Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None]); - let data_type = DataType::Dictionary(IntegerType::Int32, Box::new(DataType::Int32), false); + let data_type = DataType::Dictionary(IntegerType::Int32, Arc::new(DataType::Int32), false); let data = DictionaryArray::try_from_keys(keys, values.to_boxed()).unwrap(); let mut converter = RowConverter::new(vec![SortField::new(data_type)]); diff --git a/tests/it/compute/take.rs b/tests/it/compute/take.rs index feaa0d82081..54899f92f84 100644 --- a/tests/it/compute/take.rs +++ b/tests/it/compute/take.rs @@ -72,7 +72,7 @@ fn create_test_struct() -> StructArray { Field::new("b", DataType::Int32, true), ]; StructArray::new( - DataType::Struct(fields), + DataType::Struct(std::sync::Arc::new(fields)), vec![boolean.boxed(), int.boxed()], validity, ) diff --git a/tests/it/ffi/data.rs b/tests/it/ffi/data.rs index afb263e8531..b0a02561f12 100644 --- a/tests/it/ffi/data.rs +++ b/tests/it/ffi/data.rs @@ -291,7 +291,7 @@ fn list_list() -> Result<()> { #[test] fn struct_() -> Result<()> { - let data_type = DataType::Struct(vec![Field::new("a", DataType::Int32, true)]); + let data_type = DataType::Struct(Arc::new(vec![Field::new("a", DataType::Int32, true)])); let values = vec![Int32Array::from([Some(1), None, Some(3)]).boxed()]; let validity = Bitmap::from([true, false, true]); @@ -323,7 +323,7 @@ fn schema() -> Result<()> { let field = Field::new( "a", - DataType::Dictionary(u32::KEY_TYPE, Box::new(DataType::Utf8), false), + DataType::Dictionary(u32::KEY_TYPE, Arc::new(DataType::Utf8), false), true, ); test_round_trip_schema(field)?; @@ -341,8 +341,8 @@ fn extension() -> Result<()> { "a", DataType::Extension( "a".to_string(), - Box::new(DataType::Int32), - Some("bla".to_string()), + Arc::new(DataType::Int32), + Some("bla".to_string()).map(Arc::new), ), true, ); @@ -355,11 +355,11 @@ fn extension_children() -> Result<()> { "a", DataType::Extension( "b".to_string(), - Box::new(DataType::Struct(vec![Field::new( + Arc::new(DataType::Struct(Arc::new(vec![Field::new( "c", DataType::Int32, true, - )])), + )]))), None, ), true, diff --git a/tests/it/io/avro/read.rs b/tests/it/io/avro/read.rs index 88125087e09..b81c2a72e88 100644 --- a/tests/it/io/avro/read.rs +++ b/tests/it/io/avro/read.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::chunk::Chunk; use avro_rs::types::{Record, Value}; use avro_rs::{Codec, Writer}; @@ -73,23 +75,27 @@ pub(super) fn schema() -> (AvroSchema, Schema) { Field::new("g", DataType::Utf8, true), Field::new( "h", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + true, + ))), false, ), Field::new( "i", - DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), + DataType::Struct(Arc::new(vec![Field::new("e", DataType::Float64, false)])), false, ), Field::new( "enum", - DataType::Dictionary(i32::KEY_TYPE, Box::new(DataType::Utf8), false), + DataType::Dictionary(i32::KEY_TYPE, Arc::new(DataType::Utf8), false), false, ), Field::new("decimal", DataType::Decimal(18, 5), false), Field::new( "nullable_struct", - DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), + DataType::Struct(Arc::new(vec![Field::new("e", DataType::Float64, false)])), true, ), ]); @@ -117,7 +123,7 @@ pub(super) fn data() -> Chunk> { Utf8Array::::from([Some("foo"), None]).boxed(), array.into_box(), StructArray::new( - DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), + DataType::Struct(Arc::new(vec![Field::new("e", DataType::Float64, false)])), vec![PrimitiveArray::::from_slice([1.0, 2.0]).boxed()], None, ) @@ -132,7 +138,7 @@ pub(super) fn data() -> Chunk> { .to(DataType::Decimal(18, 5)) .boxed(), StructArray::new( - DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), + DataType::Struct(Arc::new(vec![Field::new("e", DataType::Float64, false)])), vec![PrimitiveArray::::from_slice([1.0, 0.0]).boxed()], Some([true, false].into()), ) @@ -331,7 +337,11 @@ fn schema_list() -> (AvroSchema, Schema) { let schema = Schema::from(vec![Field::new( "h", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + false, + ))), false, )]); @@ -343,7 +353,11 @@ pub(super) fn data_list() -> Chunk> { let mut array = MutableListArray::>::new_from( Default::default(), - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + false, + ))), 0, ); array.try_extend(data).unwrap(); diff --git a/tests/it/io/avro/write.rs b/tests/it/io/avro/write.rs index 5e995e7a095..7fe4bcd57b0 100644 --- a/tests/it/io/avro/write.rs +++ b/tests/it/io/avro/write.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::array::*; use arrow2::chunk::Chunk; use arrow2::datatypes::*; @@ -42,20 +44,36 @@ pub(super) fn schema() -> Schema { ), Field::new( "list", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + true, + ))), false, ), Field::new( "list nullable", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + true, + ))), true, ), ]) } pub(super) fn data() -> Chunk> { - let list_dt = DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, true))); - let list_dt1 = DataType::List(std::sync::Arc::new(Field::new("item", DataType::Int32, true))); + let list_dt = DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + true, + ))); + let list_dt1 = DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Int32, + true, + ))); let columns = vec![ Box::new(Int64Array::from_slice([27, 47])) as Box, @@ -242,28 +260,28 @@ fn struct_schema() -> Schema { Schema::from(vec![ Field::new( "struct", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("item1", DataType::Int32, false), Field::new("item2", DataType::Int32, true), - ]), + ])), false, ), Field::new( "struct nullable", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("item1", DataType::Int32, false), Field::new("item2", DataType::Int32, true), - ]), + ])), true, ), ]) } fn struct_data() -> Chunk> { - let struct_dt = DataType::Struct(vec![ + let struct_dt = DataType::Struct(Arc::new(vec![ Field::new("item1", DataType::Int32, false), Field::new("item2", DataType::Int32, true), - ]); + ])); Chunk::new(vec![ Box::new(StructArray::new( diff --git a/tests/it/io/ipc/mmap.rs b/tests/it/io/ipc/mmap.rs index 11c89ae02fd..7e9533c1c7d 100644 --- a/tests/it/io/ipc/mmap.rs +++ b/tests/it/io/ipc/mmap.rs @@ -98,7 +98,11 @@ fn struct_() -> Result<()> { let array = PrimitiveArray::::from([None, None, None, Some(3), Some(4)]).boxed(); let array = StructArray::new( - DataType::Struct(vec![Field::new("f1", array.data_type().clone(), true)]), + DataType::Struct(Arc::new(vec![Field::new( + "f1", + array.data_type().clone(), + true, + )])), vec![array], Some([true, true, false, true, false].into()), ) diff --git a/tests/it/io/json/read.rs b/tests/it/io/json/read.rs index 37063165446..ba4665ef040 100644 --- a/tests/it/io/json/read.rs +++ b/tests/it/io/json/read.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::array::*; use arrow2::datatypes::*; use arrow2::error::Result; @@ -24,7 +26,7 @@ fn read_json() -> Result<()> { let result = read::deserialize(&json, data_type)?; let expected = StructArray::new( - DataType::Struct(vec![Field::new("a", DataType::Int64, true)]), + DataType::Struct(Arc::new(vec![Field::new("a", DataType::Int64, true)])), vec![Box::new(Int64Array::from_slice([1, 2, 3])) as _], None, ); @@ -260,7 +262,10 @@ fn deserialize_timestamp_string_tz_s() -> Result<()> { let data_type = DataType::List(std::sync::Arc::new(Field::new( "item", - DataType::Timestamp(TimeUnit::Second, Some(std::sync::Arc::new("+01:00".to_string()))), + DataType::Timestamp( + TimeUnit::Second, + Some(std::sync::Arc::new("+01:00".to_string())), + ), false, ))); diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index ba07cf33298..adb51d10b58 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::datatypes::IntegerType; use arrow2::{ array::*, @@ -74,7 +76,7 @@ fn dictionary_utf8() -> Result<()> { let values = Utf8Array::::from([Some("a"), Some("b"), Some("c"), Some("d")]); let keys = PrimitiveArray::from_slice([0u32, 1, 2, 3, 1]); let array = DictionaryArray::try_new( - DataType::Dictionary(IntegerType::UInt32, Box::new(DataType::LargeUtf8), false), + DataType::Dictionary(IntegerType::UInt32, Arc::new(DataType::LargeUtf8), false), keys, Box::new(values), ) @@ -90,10 +92,10 @@ fn struct_() -> Result<()> { let c1 = Int32Array::from([Some(1), Some(2), Some(3), None, Some(5)]); let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c"), Some("d"), None]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); let expected = r#"[{"c1":1,"c2":"a"},{"c1":2,"c2":"b"},{"c1":3,"c2":"c"},{"c1":null,"c2":"d"},{"c1":5,"c2":null}]"#; @@ -103,14 +105,14 @@ fn struct_() -> Result<()> { #[test] fn nested_struct_with_validity() -> Result<()> { - let inner = vec![ + let inner = Arc::new(vec![ Field::new("c121", DataType::Utf8, false), Field::new("c122", DataType::Int32, false), - ]; - let fields = vec![ + ]); + let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), Field::new("c12", DataType::Struct(inner.clone()), false), - ]; + ]); let c1 = StructArray::new( DataType::Struct(fields), @@ -130,10 +132,10 @@ fn nested_struct_with_validity() -> Result<()> { ); let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c")]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":null},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f","c122":null}},"c2":"b"},{"c1":null,"c2":"c"}]"#; @@ -144,17 +146,17 @@ fn nested_struct_with_validity() -> Result<()> { #[test] fn nested_struct() -> Result<()> { let c121 = Field::new("c121", DataType::Utf8, false); - let fields = vec![ + let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), - Field::new("c12", DataType::Struct(vec![c121.clone()]), false), - ]; + Field::new("c12", DataType::Struct(Arc::new(vec![c121.clone()])), false), + ]); let c1 = StructArray::new( DataType::Struct(fields), vec![ Int32Array::from(&[Some(1), None, Some(5)]).boxed(), StructArray::new( - DataType::Struct(vec![c121]), + DataType::Struct(Arc::new(vec![c121])), vec![Box::new(Utf8Array::::from([ Some("e"), Some("f"), @@ -169,10 +171,10 @@ fn nested_struct() -> Result<()> { let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c")]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f"}},"c2":"b"},{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}]"#; @@ -198,10 +200,10 @@ fn struct_with_list_field() -> Result<()> { let c2 = PrimitiveArray::from_slice([1, 2, 3, 4, 5]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":["a","a1"],"c2":1},{"c1":["b"],"c2":2},{"c1":["c"],"c2":3},{"c1":["d"],"c2":4},{"c1":["e"],"c2":5}]"#; @@ -233,10 +235,10 @@ fn nested_list() -> Result<()> { let c2 = Utf8Array::::from([Some("foo"), Some("bar"), None]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = @@ -321,11 +323,11 @@ fn fixed_size_list_records() -> Result<()> { #[test] fn list_of_struct() -> Result<()> { - let inner = vec![Field::new("c121", DataType::Utf8, false)]; - let fields = vec![ + let inner = Arc::new(vec![Field::new("c121", DataType::Utf8, false)]); + let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), Field::new("c12", DataType::Struct(inner.clone()), false), - ]; + ]); let c1_datatype = DataType::List(std::sync::Arc::new(Field::new( "s", DataType::Struct(fields.clone()), @@ -363,10 +365,10 @@ fn list_of_struct() -> Result<()> { let c2 = Int32Array::from_slice([1, 2, 3]); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), - ]); + ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":[{"c11":1,"c12":null},{"c11":null,"c12":{"c121":"f"}}],"c2":1},{"c1":null,"c2":2},{"c1":[null],"c2":3}]"#; diff --git a/tests/it/io/ndjson/mod.rs b/tests/it/io/ndjson/mod.rs index f11e15b1ed0..124fa1a6552 100644 --- a/tests/it/io/ndjson/mod.rs +++ b/tests/it/io/ndjson/mod.rs @@ -1,5 +1,7 @@ mod read; +use std::sync::Arc; + use arrow2::array::*; use arrow2::bitmap::Bitmap; use arrow2::datatypes::*; @@ -46,20 +48,28 @@ fn case_list() -> (String, Box) { "# .to_string(); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new( "b", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Float64, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Float64, + true, + ))), true, ), Field::new( "c", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Boolean, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Boolean, + true, + ))), true, ), Field::new("d", DataType::Utf8, true), - ]); + ])); let a = Int64Array::from(&[Some(1), Some(-10), None]); let mut b = MutableListArray::>::new(); @@ -102,7 +112,7 @@ fn case_dict() -> (String, Box) { let data_type = DataType::List(std::sync::Arc::new(Field::new( "item", - DataType::Dictionary(u64::KEY_TYPE, Box::new(DataType::Utf8), false), + DataType::Dictionary(u64::KEY_TYPE, Arc::new(DataType::Utf8), false), true, ))); @@ -130,7 +140,12 @@ fn case_dict() -> (String, Box) { ( data, - StructArray::new(DataType::Struct(fields), vec![array.boxed()], None).boxed(), + StructArray::new( + DataType::Struct(std::sync::Arc::new(fields)), + vec![array.boxed()], + None, + ) + .boxed(), ) } @@ -139,12 +154,12 @@ fn case_basics() -> (String, Box) { {"a":-10, "b":-3.5, "c":true, "d":null} {"a":100000000, "b":0.6, "d":"text"}"# .to_string(); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new("b", DataType::Float64, true), Field::new("c", DataType::Boolean, true), Field::new("d", DataType::Utf8, true), - ]); + ])); let array = StructArray::new( data_type, vec![ @@ -163,13 +178,13 @@ fn case_projection() -> (String, Box) { {"a":10, "b":-3.5, "c":true, "d":null, "e":"text"} {"a":100000000, "b":0.6, "d":"text"}"# .to_string(); - let data_type = DataType::Struct(vec![ + let data_type = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::UInt32, true), Field::new("b", DataType::Float32, true), Field::new("c", DataType::Boolean, true), // note how "d" is not here Field::new("e", DataType::Binary, true), - ]); + ])); let array = StructArray::new( data_type, vec![ @@ -191,27 +206,30 @@ fn case_struct() -> (String, Box) { .to_string(); let d_field = Field::new("d", DataType::Utf8, true); - let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true); + let c_field = Field::new("c", DataType::Struct(Arc::new(vec![d_field.clone()])), true); let a_field = Field::new( "a", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("b", DataType::Boolean, true), c_field.clone(), - ]), + ])), true, ); - let fields = vec![a_field]; + let fields = Arc::new(vec![a_field]); // build expected output let d = Utf8Array::::from([Some("text"), None, Some("text"), None]); let c = StructArray::new( - DataType::Struct(vec![d_field]), + DataType::Struct(Arc::new(vec![d_field])), vec![d.boxed()], Some([true, false, true, true].into()), ); let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]); - let inner = DataType::Struct(vec![Field::new("b", DataType::Boolean, true), c_field]); + let inner = DataType::Struct(Arc::new(vec![ + Field::new("b", DataType::Boolean, true), + c_field, + ])); let expected = StructArray::new( inner, vec![b.boxed(), c.boxed()], @@ -228,11 +246,11 @@ fn case_struct() -> (String, Box) { fn case_nested_list() -> (String, Box) { let d_field = Field::new("d", DataType::Utf8, true); - let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true); + let c_field = Field::new("c", DataType::Struct(Arc::new(vec![d_field.clone()])), true); let b_field = Field::new("b", DataType::Boolean, true); let a_struct_field = Field::new( "a", - DataType::Struct(vec![b_field.clone(), c_field.clone()]), + DataType::Struct(Arc::new(vec![b_field.clone(), c_field.clone()])), true, ); let a_list_data_type = DataType::List(std::sync::Arc::new(a_struct_field)); @@ -257,7 +275,7 @@ fn case_nested_list() -> (String, Box) { ]); let c = StructArray::new( - DataType::Struct(vec![d_field]), + DataType::Struct(Arc::new(vec![d_field])), vec![d.boxed()], Some(Bitmap::from_u8_slice([0b11111011], 6)), ); @@ -271,7 +289,7 @@ fn case_nested_list() -> (String, Box) { Some(true), ]); let a_struct = StructArray::new( - DataType::Struct(vec![b_field, c_field]), + DataType::Struct(Arc::new(vec![b_field, c_field])), vec![b.boxed(), c.boxed()], None, ); @@ -283,7 +301,7 @@ fn case_nested_list() -> (String, Box) { ); let array = StructArray::new( - DataType::Struct(vec![a_field]), + DataType::Struct(Arc::new(vec![a_field])), vec![expected.boxed()], None, ) @@ -316,7 +334,7 @@ fn infer_object() -> Result<()> { let utf8_fld = Field::new("utf8", DataType::Utf8, true); let bools_fld = Field::new("bools", DataType::Boolean, true); - let expected = DataType::Struct(vec![u64_fld, f64_fld, utf8_fld, bools_fld]); + let expected = DataType::Struct(Arc::new(vec![u64_fld, f64_fld, utf8_fld, bools_fld])); let actual = infer(data)?; assert_eq!(expected, actual); diff --git a/tests/it/io/ndjson/read.rs b/tests/it/io/ndjson/read.rs index 82553ef36d2..9d85f1f51e4 100644 --- a/tests/it/io/ndjson/read.rs +++ b/tests/it/io/ndjson/read.rs @@ -1,4 +1,5 @@ use std::io::Cursor; +use std::sync::Arc; use arrow2::array::*; use arrow2::datatypes::{DataType, Field}; @@ -89,13 +90,13 @@ fn case_nested_struct() -> (String, Box) { {"a": {"a": 2.0, "b": 2}} "#; - let inner = DataType::Struct(vec![ + let inner = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Float64, true), Field::new("b", DataType::Int64, true), Field::new("c", DataType::Boolean, true), - ]); + ])); - let data_type = DataType::Struct(vec![Field::new("a", inner.clone(), true)]); + let data_type = DataType::Struct(Arc::new(vec![Field::new("a", inner.clone(), true)])); let values = vec![ Float64Array::from([Some(2.0), None, Some(2.0), Some(2.0)]).boxed(), @@ -168,20 +169,28 @@ fn infer_schema_mixed_list() -> Result<()> { {"a":3, "b":4, "c": true, "d":[1, false, "array", 2.4]} "#; - let expected = DataType::Struct(vec![ + let expected = DataType::Struct(Arc::new(vec![ Field::new("a", DataType::Int64, true), Field::new( "b", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Float64, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Float64, + true, + ))), true, ), Field::new( "c", - DataType::List(std::sync::Arc::new(Field::new("item", DataType::Boolean, true))), + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Boolean, + true, + ))), true, ), Field::new("d", DataType::Utf8, true), - ]); + ])); let result = infer(ndjson)?; @@ -240,10 +249,10 @@ fn line_break_in_values() -> Result<()> { fn invalid_read_record() -> Result<()> { let fields = vec![Field::new( "a", - DataType::Struct(vec![Field::new("a", DataType::Utf8, true)]), + DataType::Struct(Arc::new(vec![Field::new("a", DataType::Utf8, true)])), true, )]; - let data_type = DataType::Struct(fields); + let data_type = DataType::Struct(std::sync::Arc::new(fields)); let arrays = read_and_deserialize("city,lat,lng", &data_type, 1000); assert_eq!( @@ -262,7 +271,7 @@ fn skip_empty_lines() -> Result<()> { {\"a\": 3}"; - let data_type = DataType::Struct(vec![Field::new("a", DataType::Int64, true)]); + let data_type = DataType::Struct(Arc::new(vec![Field::new("a", DataType::Int64, true)])); let arrays = read_and_deserialize(ndjson, &data_type, 1000)?; assert_eq!(1, arrays.len()); diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index d3f651d0a90..6fa07bc3989 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -34,7 +34,11 @@ fn new_struct( .zip(arrays.iter()) .map(|(n, a)| Field::new(n, a.data_type().clone(), true)) .collect(); - StructArray::new(DataType::Struct(fields), arrays, validity) + StructArray::new( + DataType::Struct(std::sync::Arc::new(fields)), + arrays, + validity, + ) } pub fn read_column(mut reader: R, column: &str) -> Result { @@ -108,7 +112,11 @@ pub fn pyarrow_nested_edge(column: &str) -> Box { None, ); StructArray::new( - DataType::Struct(vec![Field::new("f1", a.data_type().clone(), true)]), + DataType::Struct(Arc::new(vec![Field::new( + "f1", + a.data_type().clone(), + true, + )])), vec![a.boxed()], None, ) @@ -586,7 +594,10 @@ pub fn pyarrow_nullable(column: &str) -> Box { PrimitiveArray::::from(i64_values).to(DataType::Timestamp(TimeUnit::Second, None)), ), "timestamp_s_utc" => Box::new(PrimitiveArray::::from(i64_values).to( - DataType::Timestamp(TimeUnit::Second, Some(std::sync::Arc::new("UTC".to_string()))), + DataType::Timestamp( + TimeUnit::Second, + Some(std::sync::Arc::new("UTC".to_string())), + ), )), _ => unreachable!(), } @@ -1072,7 +1083,7 @@ pub fn pyarrow_nested_edge_statistics(column: &str) -> Statistics { .zip(arrays.iter()) .map(|(n, a)| Field::new(n, a.data_type().clone(), true)) .collect(); - StructArray::new(DataType::Struct(fields), arrays, None) + StructArray::new(DataType::Struct(std::sync::Arc::new(fields)), arrays, None) }; let names = vec!["f1".to_string()]; @@ -1178,18 +1189,28 @@ pub fn pyarrow_struct(column: &str) -> Box { Field::new("f2", DataType::Boolean, true), ]; match column { - "struct" => StructArray::new(DataType::Struct(fields), vec![string, boolean], None).boxed(), + "struct" => StructArray::new( + DataType::Struct(std::sync::Arc::new(fields)), + vec![string, boolean], + None, + ) + .boxed(), "struct_nullable" => { let values = vec![string, boolean]; - StructArray::new(DataType::Struct(fields), values, Some(mask.into())).boxed() + StructArray::new( + DataType::Struct(std::sync::Arc::new(fields)), + values, + Some(mask.into()), + ) + .boxed() } "struct_struct" => { let struct_ = pyarrow_struct("struct"); Box::new(StructArray::new( - DataType::Struct(vec![ - Field::new("f1", DataType::Struct(fields), true), + DataType::Struct(Arc::new(vec![ + Field::new("f1", DataType::Struct(std::sync::Arc::new(fields)), true), Field::new("f2", DataType::Boolean, true), - ]), + ])), vec![struct_, boolean], None, )) @@ -1197,10 +1218,10 @@ pub fn pyarrow_struct(column: &str) -> Box { "struct_struct_nullable" => { let struct_ = pyarrow_struct("struct"); Box::new(StructArray::new( - DataType::Struct(vec![ - Field::new("f1", DataType::Struct(fields), true), + DataType::Struct(Arc::new(vec![ + Field::new("f1", DataType::Struct(std::sync::Arc::new(fields)), true), Field::new("f2", DataType::Boolean, true), - ]), + ])), vec![struct_, boolean], Some(mask.into()), )) @@ -1381,10 +1402,10 @@ pub fn pyarrow_map(column: &str) -> Box { "map" => { let s1 = [Some("a1"), Some("a2")]; let s2 = [Some("b1"), Some("b2")]; - let dt = DataType::Struct(vec![ + let dt = DataType::Struct(Arc::new(vec![ Field::new("key", DataType::Utf8, false), Field::new("value", DataType::Utf8, true), - ]); + ])); MapArray::try_new( DataType::Map( std::sync::Arc::new(Field::new("entries", dt.clone(), false)), @@ -1409,10 +1430,10 @@ pub fn pyarrow_map(column: &str) -> Box { "map_nullable" => { let s1 = [Some("a1"), Some("a2")]; let s2 = [Some("b1"), None]; - let dt = DataType::Struct(vec![ + let dt = DataType::Struct(Arc::new(vec![ Field::new("key", DataType::Utf8, false), Field::new("value", DataType::Utf8, true), - ]); + ])); MapArray::try_new( DataType::Map( std::sync::Arc::new(Field::new("entries", dt.clone(), false)), @@ -1440,11 +1461,13 @@ pub fn pyarrow_map(column: &str) -> Box { pub fn pyarrow_map_statistics(column: &str) -> Statistics { let new_map = |arrays: Vec>, fields: Vec| { - let fields = fields - .into_iter() - .zip(arrays.iter()) - .map(|(f, a)| Field::new(f.name, a.data_type().clone(), f.is_nullable)) - .collect::>(); + let fields = Arc::new( + fields + .into_iter() + .zip(arrays.iter()) + .map(|(f, a)| Field::new(f.name, a.data_type().clone(), f.is_nullable)) + .collect::>(), + ); MapArray::new( DataType::Map( Arc::new(Field::new( diff --git a/tests/it/io/print.rs b/tests/it/io/print.rs index eca079a0cfd..3f23ea4c1ab 100644 --- a/tests/it/io/print.rs +++ b/tests/it/io/print.rs @@ -327,7 +327,7 @@ fn write_struct() -> Result<()> { let validity = Some(Bitmap::from(&[true, false, true])); - let array = StructArray::new(DataType::Struct(fields), values, validity); + let array = StructArray::new(DataType::Struct(std::sync::Arc::new(fields)), values, validity); let columns = Chunk::new(vec![&array as &dyn Array]); @@ -356,7 +356,7 @@ fn write_union() -> Result<()> { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Utf8, true), ]; - let data_type = DataType::Union(fields, None, UnionMode::Sparse); + let data_type = DataType::Union(std::sync::Arc::new(fields), None, UnionMode::Sparse); let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Int32Array::from(&[Some(1), None, Some(2)]).boxed(), diff --git a/tests/it/scalar/map.rs b/tests/it/scalar/map.rs index 1fb29eeb628..b8fc5cd9601 100644 --- a/tests/it/scalar/map.rs +++ b/tests/it/scalar/map.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::{BooleanArray, StructArray, Utf8Array}, datatypes::{DataType, Field}, @@ -7,10 +9,10 @@ use arrow2::{ #[allow(clippy::eq_op)] #[test] fn equal() { - let kv_dt = DataType::Struct(vec![ + let kv_dt = DataType::Struct(Arc::new(vec![ Field::new("key", DataType::Utf8, false), Field::new("value", DataType::Boolean, true), - ]); + ])); let kv_array1 = StructArray::try_new( kv_dt.clone(), vec![ @@ -30,7 +32,10 @@ fn equal() { ) .unwrap(); - let dt = DataType::Map(std::sync::Arc::new(Field::new("entries", kv_dt, true)), false); + let dt = DataType::Map( + std::sync::Arc::new(Field::new("entries", kv_dt, true)), + false, + ); let a = MapScalar::new(dt.clone(), Some(Box::new(kv_array1))); let b = MapScalar::new(dt.clone(), None); assert_eq!(a, a); @@ -43,10 +48,10 @@ fn equal() { #[test] fn basics() { - let kv_dt = DataType::Struct(vec![ + let kv_dt = DataType::Struct(Arc::new(vec![ Field::new("key", DataType::Utf8, false), Field::new("value", DataType::Boolean, true), - ]); + ])); let kv_array = StructArray::try_new( kv_dt.clone(), vec![ @@ -57,7 +62,10 @@ fn basics() { ) .unwrap(); - let dt = DataType::Map(std::sync::Arc::new(Field::new("entries", kv_dt, true)), false); + let dt = DataType::Map( + std::sync::Arc::new(Field::new("entries", kv_dt, true)), + false, + ); let a = MapScalar::new(dt.clone(), Some(Box::new(kv_array.clone()))); assert_eq!(kv_array, a.values().as_ref()); diff --git a/tests/it/scalar/struct_.rs b/tests/it/scalar/struct_.rs index 2785ecb7b41..46839d3bc45 100644 --- a/tests/it/scalar/struct_.rs +++ b/tests/it/scalar/struct_.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ datatypes::{DataType, Field}, scalar::{BooleanScalar, Scalar, StructScalar}, @@ -6,7 +8,7 @@ use arrow2::{ #[allow(clippy::eq_op)] #[test] fn equal() { - let dt = DataType::Struct(vec![Field::new("a", DataType::Boolean, true)]); + let dt = DataType::Struct(Arc::new(vec![Field::new("a", DataType::Boolean, true)])); let a = StructScalar::new( dt.clone(), Some(vec![ @@ -29,7 +31,7 @@ fn equal() { #[test] fn basics() { - let dt = DataType::Struct(vec![Field::new("a", DataType::Boolean, true)]); + let dt = DataType::Struct(Arc::new(vec![Field::new("a", DataType::Boolean, true)])); let values = vec![Box::new(BooleanScalar::from(Some(true))) as Box];