Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Treat empty timezone string as no-timezone (#1009)
Browse files Browse the repository at this point in the history
  • Loading branch information
dbr authored May 26, 2022
1 parent 5725fa3 commit ed4734f
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 66 deletions.
6 changes: 3 additions & 3 deletions src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use crate::array::Array;
use crate::datatypes::{IntervalUnit, TimeUnit};
use crate::types::{days_ms, months_days_ns};

use super::super::super::temporal_conversions;
use super::super::super::types::NativeType;
use super::super::fmt::write_vec;
use super::PrimitiveArray;
use crate::array::fmt::write_vec;
use crate::temporal_conversions;
use crate::types::NativeType;

macro_rules! dyn_primitive {
($array:expr, $ty:ty, $expr:expr) => {{
Expand Down
233 changes: 170 additions & 63 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,74 +288,102 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result<DataType> {
DataType::Struct(children)
}
other => {
let parts = other.split(':').collect::<Vec<_>>();
if parts.len() == 2 && parts[0] == "tss" {
DataType::Timestamp(TimeUnit::Second, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsm" {
DataType::Timestamp(TimeUnit::Millisecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsu" {
DataType::Timestamp(TimeUnit::Microsecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsn" {
DataType::Timestamp(TimeUnit::Nanosecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "w" {
let size = parts[1].parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("size is not a valid integer".to_string())
})?;
DataType::FixedSizeBinary(size)
} else if parts.len() == 2 && parts[0] == "+w" {
let size = parts[1].parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("size is not a valid integer".to_string())
})?;
let child = to_field(schema.child(0))?;
DataType::FixedSizeList(Box::new(child), size)
} else if parts.len() == 2 && parts[0] == "d" {
let parts = parts[1].split(',').collect::<Vec<_>>();
if parts.len() < 2 || parts.len() > 3 {
return Err(ArrowError::OutOfSpec(
"Decimal must contain 2 or 3 comma-separated values".to_string(),
));
};
if parts.len() == 3 {
let bit_width = parts[0].parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec(
"Decimal bit width is not a valid integer".to_string(),
)
match other.split(':').collect::<Vec<_>>()[..] {
// Timestamps with no timezone
["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),

// Timestamps with timezone
["tss", tz] => DataType::Timestamp(TimeUnit::Second, Some(tz.to_string())),
["tsm", tz] => DataType::Timestamp(TimeUnit::Millisecond, Some(tz.to_string())),
["tsu", tz] => DataType::Timestamp(TimeUnit::Microsecond, Some(tz.to_string())),
["tsn", tz] => DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.to_string())),

["w", size_raw] => {
// Example: "w:42" fixed-width binary [42 bytes]
let size = size_raw.parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("size is not a valid integer".to_string())
})?;
if bit_width != 128 {
return Err(ArrowError::OutOfSpec(
"Decimal256 is not supported".to_string(),
));
}
DataType::FixedSizeBinary(size)
}
let precision = parts[0].parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("Decimal precision is not a valid integer".to_string())
})?;
let scale = parts[1].parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("Decimal scale is not a valid integer".to_string())
})?;
DataType::Decimal(precision, scale)
} else if !parts.is_empty() && ((parts[0] == "+us") || (parts[0] == "+ud")) {
// union
let mode = UnionMode::sparse(parts[0] == "+us");
let type_ids = parts[1]
.split(',')
.map(|x| {
x.parse::<i32>().map_err(|_| {
["+w", size_raw] => {
// Example: "+w:123" fixed-sized list [123 items]
let size = size_raw.parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec("size is not a valid integer".to_string())
})?;
let child = to_field(schema.child(0))?;
DataType::FixedSizeList(Box::new(child), size)
}
["d", raw] => {
// Decimal
let (precision, scale) = match raw.split(',').collect::<Vec<_>>()[..] {
[precision_raw, scale_raw] => {
// Example: "d:19,10" decimal128 [precision 19, scale 10]
(precision_raw, scale_raw)
}
[precision_raw, scale_raw, width_raw] => {
// Example: "d:19,10,NNN" decimal bitwidth = NNN [precision 19, scale 10]
// Only bitwdth of 128 currently supported
let bit_width = width_raw.parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec(
"Decimal bit width is not a valid integer".to_string(),
)
})?;
if bit_width != 128 {
return Err(ArrowError::OutOfSpec(
"Decimal256 is not supported".to_string(),
));
}
(precision_raw, scale_raw)
}
_ => {
return Err(ArrowError::OutOfSpec(
"Decimal must contain 2 or 3 comma-separated values".to_string(),
));
}
};

DataType::Decimal(
precision.parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec(
"Decimal precision is not a valid integer".to_string(),
)
})?,
scale.parse::<usize>().map_err(|_| {
ArrowError::OutOfSpec(
"Union type id is not a valid integer".to_string(),
"Decimal scale is not a valid integer".to_string(),
)
})?,
)
}
[union_type @ "+us", union_parts] | [union_type @ "+ud", union_parts] => {
// union, sparse
// Example "+us:I,J,..." sparse union with type ids I,J...
// Example: "+ud:I,J,..." dense union with type ids I,J...
let mode = UnionMode::sparse(union_type == "+us");
let type_ids = union_parts
.split(',')
.map(|x| {
x.parse::<i32>().map_err(|_| {
ArrowError::OutOfSpec(
"Union type id is not a valid integer".to_string(),
)
})
})
})
.collect::<Result<Vec<_>>>()?;
let fields = (0..schema.n_children as usize)
.map(|x| to_field(schema.child(x)))
.collect::<Result<Vec<_>>>()?;
DataType::Union(fields, Some(type_ids), mode)
} else {
return Err(ArrowError::OutOfSpec(format!(
"The datatype \"{}\" is still not supported in Rust implementation",
other
)));
.collect::<Result<Vec<_>>>()?;
let fields = (0..schema.n_children as usize)
.map(|x| to_field(schema.child(x)))
.collect::<Result<Vec<_>>>()?;
DataType::Union(fields, Some(type_ids), mode)
}
_ => {
return Err(ArrowError::OutOfSpec(format!(
"The datatype \"{}\" is still not supported in Rust implementation",
other
)));
}
}
}
})
Expand Down Expand Up @@ -511,3 +539,82 @@ unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata,
let extension = extension_name.map(|name| (name, extension_metadata));
(result, extension)
}

#[cfg(test)]
mod tests {
use crate::datatypes::{DataType, Field, TimeUnit};

macro_rules! test_dt_conv {
($name:ident, $field:expr, $expected:expr,) => {
#[test]
fn $name() {
use crate::ffi::ArrowSchema;

let field = $field;
let schema = ArrowSchema::new(&field);
let dt = unsafe { super::to_data_type(&schema).unwrap() };
let result = super::to_format(&dt);
assert_eq!(result, $expected);
}
};
}

test_dt_conv!(
test_bool,
Field::new("example", DataType::Boolean, false),
"b",
);

// Timezone handling
test_dt_conv!(
test_timestamp_none,
Field::new(
"example",
DataType::Timestamp(TimeUnit::Second, None),
false
),
"tss:",
);

test_dt_conv!(
test_timestamp_something,
Field::new(
"example",
DataType::Timestamp(TimeUnit::Second, Some("Nonsense timezone!!".to_string())),
false
),
"tss:Nonsense timezone!!",
);

test_dt_conv!(
test_timestamp_empty,
Field::new(
"example",
DataType::Timestamp(TimeUnit::Second, Some(" ".to_string())),
false
),
"tss: ",
);

// Other time stamp types
test_dt_conv!(
test_timestamp_nanoseconds,
Field::new(
"example",
DataType::Timestamp(TimeUnit::Nanosecond, None),
false
),
"tsn:",
);

// Other time stamp types
test_dt_conv!(
test_list,
Field::new(
"example",
DataType::List(Box::new(Field::new("example", DataType::Boolean, false))),
false
),
"+l",
);
}

0 comments on commit ed4734f

Please sign in to comment.