diff --git a/src/array/primitive/fmt.rs b/src/array/primitive/fmt.rs index 5a94b399424..a5c028850bd 100644 --- a/src/array/primitive/fmt.rs +++ b/src/array/primitive/fmt.rs @@ -4,10 +4,10 @@ use crate::array::Array; use crate::datatypes::{IntervalUnit, TimeUnit}; use crate::types::{days_ms, months_days_ns}; -use super::super::super::temporal_conversions; -use super::super::super::types::NativeType; -use super::super::fmt::write_vec; use super::PrimitiveArray; +use crate::array::fmt::write_vec; +use crate::temporal_conversions; +use crate::types::NativeType; macro_rules! dyn_primitive { ($array:expr, $ty:ty, $expr:expr) => {{ diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index a65d5568b7a..329e9c9f707 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -288,74 +288,102 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result { DataType::Struct(children) } other => { - let parts = other.split(':').collect::>(); - if parts.len() == 2 && parts[0] == "tss" { - DataType::Timestamp(TimeUnit::Second, Some(parts[1].to_string())) - } else if parts.len() == 2 && parts[0] == "tsm" { - DataType::Timestamp(TimeUnit::Millisecond, Some(parts[1].to_string())) - } else if parts.len() == 2 && parts[0] == "tsu" { - DataType::Timestamp(TimeUnit::Microsecond, Some(parts[1].to_string())) - } else if parts.len() == 2 && parts[0] == "tsn" { - DataType::Timestamp(TimeUnit::Nanosecond, Some(parts[1].to_string())) - } else if parts.len() == 2 && parts[0] == "w" { - let size = parts[1].parse::().map_err(|_| { - ArrowError::OutOfSpec("size is not a valid integer".to_string()) - })?; - DataType::FixedSizeBinary(size) - } else if parts.len() == 2 && parts[0] == "+w" { - let size = parts[1].parse::().map_err(|_| { - ArrowError::OutOfSpec("size is not a valid integer".to_string()) - })?; - let child = to_field(schema.child(0))?; - DataType::FixedSizeList(Box::new(child), size) - } else if parts.len() == 2 && parts[0] == "d" { - let parts = parts[1].split(',').collect::>(); - if parts.len() < 2 || parts.len() > 3 { - return Err(ArrowError::OutOfSpec( - "Decimal must contain 2 or 3 comma-separated values".to_string(), - )); - }; - if parts.len() == 3 { - let bit_width = parts[0].parse::().map_err(|_| { - ArrowError::OutOfSpec( - "Decimal bit width is not a valid integer".to_string(), - ) + match other.split(':').collect::>()[..] { + // Timestamps with no timezone + ["tss", ""] => DataType::Timestamp(TimeUnit::Second, None), + ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None), + ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None), + ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None), + + // Timestamps with timezone + ["tss", tz] => DataType::Timestamp(TimeUnit::Second, Some(tz.to_string())), + ["tsm", tz] => DataType::Timestamp(TimeUnit::Millisecond, Some(tz.to_string())), + ["tsu", tz] => DataType::Timestamp(TimeUnit::Microsecond, Some(tz.to_string())), + ["tsn", tz] => DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.to_string())), + + ["w", size_raw] => { + // Example: "w:42" fixed-width binary [42 bytes] + let size = size_raw.parse::().map_err(|_| { + ArrowError::OutOfSpec("size is not a valid integer".to_string()) })?; - if bit_width != 128 { - return Err(ArrowError::OutOfSpec( - "Decimal256 is not supported".to_string(), - )); - } + DataType::FixedSizeBinary(size) } - let precision = parts[0].parse::().map_err(|_| { - ArrowError::OutOfSpec("Decimal precision is not a valid integer".to_string()) - })?; - let scale = parts[1].parse::().map_err(|_| { - ArrowError::OutOfSpec("Decimal scale is not a valid integer".to_string()) - })?; - DataType::Decimal(precision, scale) - } else if !parts.is_empty() && ((parts[0] == "+us") || (parts[0] == "+ud")) { - // union - let mode = UnionMode::sparse(parts[0] == "+us"); - let type_ids = parts[1] - .split(',') - .map(|x| { - x.parse::().map_err(|_| { + ["+w", size_raw] => { + // Example: "+w:123" fixed-sized list [123 items] + let size = size_raw.parse::().map_err(|_| { + ArrowError::OutOfSpec("size is not a valid integer".to_string()) + })?; + let child = to_field(schema.child(0))?; + DataType::FixedSizeList(Box::new(child), size) + } + ["d", raw] => { + // Decimal + let (precision, scale) = match raw.split(',').collect::>()[..] { + [precision_raw, scale_raw] => { + // Example: "d:19,10" decimal128 [precision 19, scale 10] + (precision_raw, scale_raw) + } + [precision_raw, scale_raw, width_raw] => { + // Example: "d:19,10,NNN" decimal bitwidth = NNN [precision 19, scale 10] + // Only bitwdth of 128 currently supported + let bit_width = width_raw.parse::().map_err(|_| { + ArrowError::OutOfSpec( + "Decimal bit width is not a valid integer".to_string(), + ) + })?; + if bit_width != 128 { + return Err(ArrowError::OutOfSpec( + "Decimal256 is not supported".to_string(), + )); + } + (precision_raw, scale_raw) + } + _ => { + return Err(ArrowError::OutOfSpec( + "Decimal must contain 2 or 3 comma-separated values".to_string(), + )); + } + }; + + DataType::Decimal( + precision.parse::().map_err(|_| { + ArrowError::OutOfSpec( + "Decimal precision is not a valid integer".to_string(), + ) + })?, + scale.parse::().map_err(|_| { ArrowError::OutOfSpec( - "Union type id is not a valid integer".to_string(), + "Decimal scale is not a valid integer".to_string(), ) + })?, + ) + } + [union_type @ "+us", union_parts] | [union_type @ "+ud", union_parts] => { + // union, sparse + // Example "+us:I,J,..." sparse union with type ids I,J... + // Example: "+ud:I,J,..." dense union with type ids I,J... + let mode = UnionMode::sparse(union_type == "+us"); + let type_ids = union_parts + .split(',') + .map(|x| { + x.parse::().map_err(|_| { + ArrowError::OutOfSpec( + "Union type id is not a valid integer".to_string(), + ) + }) }) - }) - .collect::>>()?; - let fields = (0..schema.n_children as usize) - .map(|x| to_field(schema.child(x))) - .collect::>>()?; - DataType::Union(fields, Some(type_ids), mode) - } else { - return Err(ArrowError::OutOfSpec(format!( - "The datatype \"{}\" is still not supported in Rust implementation", - other - ))); + .collect::>>()?; + let fields = (0..schema.n_children as usize) + .map(|x| to_field(schema.child(x))) + .collect::>>()?; + DataType::Union(fields, Some(type_ids), mode) + } + _ => { + return Err(ArrowError::OutOfSpec(format!( + "The datatype \"{}\" is still not supported in Rust implementation", + other + ))); + } } } }) @@ -511,3 +539,82 @@ unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, let extension = extension_name.map(|name| (name, extension_metadata)); (result, extension) } + +#[cfg(test)] +mod tests { + use crate::datatypes::{DataType, Field, TimeUnit}; + + macro_rules! test_dt_conv { + ($name:ident, $field:expr, $expected:expr,) => { + #[test] + fn $name() { + use crate::ffi::ArrowSchema; + + let field = $field; + let schema = ArrowSchema::new(&field); + let dt = unsafe { super::to_data_type(&schema).unwrap() }; + let result = super::to_format(&dt); + assert_eq!(result, $expected); + } + }; + } + + test_dt_conv!( + test_bool, + Field::new("example", DataType::Boolean, false), + "b", + ); + + // Timezone handling + test_dt_conv!( + test_timestamp_none, + Field::new( + "example", + DataType::Timestamp(TimeUnit::Second, None), + false + ), + "tss:", + ); + + test_dt_conv!( + test_timestamp_something, + Field::new( + "example", + DataType::Timestamp(TimeUnit::Second, Some("Nonsense timezone!!".to_string())), + false + ), + "tss:Nonsense timezone!!", + ); + + test_dt_conv!( + test_timestamp_empty, + Field::new( + "example", + DataType::Timestamp(TimeUnit::Second, Some(" ".to_string())), + false + ), + "tss: ", + ); + + // Other time stamp types + test_dt_conv!( + test_timestamp_nanoseconds, + Field::new( + "example", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false + ), + "tsn:", + ); + + // Other time stamp types + test_dt_conv!( + test_list, + Field::new( + "example", + DataType::List(Box::new(Field::new("example", DataType::Boolean, false))), + false + ), + "+l", + ); +}