From 3cd508972b94abe331628739a5ea622299716542 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 18 Aug 2021 07:10:38 +0000 Subject: [PATCH] Fixed errors in cast. --- src/compute/cast/mod.rs | 655 +-------------------------------------- tests/it/compute/cast.rs | 635 +++++++++++++++++++++++++++++++++++++ tests/it/compute/mod.rs | 1 + tests/it/main.rs | 2 + 4 files changed, 646 insertions(+), 647 deletions(-) create mode 100644 tests/it/compute/cast.rs create mode 100644 tests/it/compute/mod.rs diff --git a/src/compute/cast/mod.rs b/src/compute/cast/mod.rs index 9d09e6bc755..eca849ea481 100644 --- a/src/compute/cast/mod.rs +++ b/src/compute/cast/mod.rs @@ -116,18 +116,22 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type), (_, Boolean) => is_numeric(from_type), - (Boolean, _) => is_numeric(to_type) || to_type == &Utf8, + (Boolean, _) => is_numeric(to_type) || to_type == &Utf8 || to_type == &LargeUtf8, (Utf8, Date32) => true, (Utf8, Date64) => true, (Utf8, Timestamp(TimeUnit::Nanosecond, None)) => true, (Utf8, LargeUtf8) => true, (Utf8, _) => is_numeric(to_type), + (LargeUtf8, Date32) => true, + (LargeUtf8, Date64) => true, (LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true, (LargeUtf8, Utf8) => true, (LargeUtf8, _) => is_numeric(to_type), (_, Utf8) => is_numeric(from_type) || from_type == &Binary, (_, LargeUtf8) => is_numeric(from_type) || from_type == &Binary, + (Binary, LargeBinary) => true, + (LargeBinary, Binary) => true, // start numeric casts (UInt8, UInt16) => true, @@ -356,6 +360,7 @@ fn cast_with_options( let as_options = options.with_wrapped(true); match (from_type, to_type) { + (Null, Int32) => Ok(new_null_array(to_type.clone(), array.len())), (Struct(_), _) => Err(ArrowError::NotYetImplemented( "Cannot cast from struct to other types".to_string(), )), @@ -375,7 +380,7 @@ fn cast_with_options( to_type, )) .map(|x| Box::new(x) as Box), - (List(lhs), LargeList(rhs)) if lhs == rhs => Ok(cast_large_to_list( + (LargeList(lhs), List(rhs)) if lhs == rhs => Ok(cast_large_to_list( array.as_any().downcast_ref().unwrap(), to_type, )) @@ -388,8 +393,7 @@ fn cast_with_options( let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(0..=array.len() as i32) }; - let data_type = ListArray::::default_datatype(to.data_type().clone()); - let list_array = ListArray::::from_data(data_type, offsets, values, None); + let list_array = ListArray::::from_data(to_type.clone(), offsets, values, None); Ok(Box::new(list_array)) } @@ -750,646 +754,3 @@ fn cast_to_dictionary( ))), } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::NativeType; - - #[test] - fn i32_to_f64() { - let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - let b = cast(&array, &DataType::Float64).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - assert!((5.0 - c.value(0)).abs() < f64::EPSILON); - assert!((6.0 - c.value(1)).abs() < f64::EPSILON); - assert!((7.0 - c.value(2)).abs() < f64::EPSILON); - assert!((8.0 - c.value(3)).abs() < f64::EPSILON); - assert!((9.0 - c.value(4)).abs() < f64::EPSILON); - } - - #[test] - fn i32_as_f64_no_overflow() { - let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - let b = wrapping_cast(&array, &DataType::Float64).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - assert!((5.0 - c.value(0)).abs() < f64::EPSILON); - assert!((6.0 - c.value(1)).abs() < f64::EPSILON); - assert!((7.0 - c.value(2)).abs() < f64::EPSILON); - assert!((8.0 - c.value(3)).abs() < f64::EPSILON); - assert!((9.0 - c.value(4)).abs() < f64::EPSILON); - } - - #[test] - fn u16_as_u8_overflow() { - let array = UInt16Array::from_slice(&[255, 256, 257, 258, 259]); - let b = wrapping_cast(&array, &DataType::UInt8).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - let values = c.values().as_slice(); - - println!("{}", 255u8.wrapping_add(10)); - - assert_eq!(values, &[255, 0, 1, 2, 3]) - } - - #[test] - fn u16_as_u8_no_overflow() { - let array = UInt16Array::from_slice(&[1, 2, 3, 4, 5]); - let b = wrapping_cast(&array, &DataType::UInt8).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - let values = c.values().as_slice(); - assert_eq!(values, &[1, 2, 3, 4, 5]) - } - - #[test] - fn float_range_max() { - //floats to integers - let u: Option = num::cast(f32::MAX); - assert_eq!(u, None); - let u: Option = num::cast(f32::MAX); - assert_eq!(u, None); - - let u: Option = num::cast(f32::MAX); - assert_eq!(u, None); - let u: Option = num::cast(f32::MAX); - assert_eq!(u, None); - - let u: Option = num::cast(f64::MAX); - assert_eq!(u, None); - let u: Option = num::cast(f64::MAX); - assert_eq!(u, None); - - let u: Option = num::cast(f64::MAX); - assert_eq!(u, None); - let u: Option = num::cast(f64::MAX); - assert_eq!(u, None); - - //integers to floats - let u: Option = num::cast(u32::MAX); - assert!(u.is_some()); - let u: Option = num::cast(u32::MAX); - assert!(u.is_some()); - - let u: Option = num::cast(i32::MAX); - assert!(u.is_some()); - let u: Option = num::cast(i32::MAX); - assert!(u.is_some()); - - let u: Option = num::cast(i64::MAX); - assert!(u.is_some()); - let u: Option = num::cast(u64::MAX); - assert!(u.is_some()); - - let u: Option = num::cast(f32::MAX); - assert!(u.is_some()); - } - - #[test] - fn float_range_min() { - //floats to integers - let u: Option = num::cast(f32::MIN); - assert_eq!(u, None); - let u: Option = num::cast(f32::MIN); - assert_eq!(u, None); - - let u: Option = num::cast(f32::MIN); - assert_eq!(u, None); - let u: Option = num::cast(f32::MIN); - assert_eq!(u, None); - - let u: Option = num::cast(f64::MIN); - assert_eq!(u, None); - let u: Option = num::cast(f64::MIN); - assert_eq!(u, None); - - let u: Option = num::cast(f64::MIN); - assert_eq!(u, None); - let u: Option = num::cast(f64::MIN); - assert_eq!(u, None); - - //integers to floats - let u: Option = num::cast(u32::MIN); - assert!(u.is_some()); - let u: Option = num::cast(u32::MIN); - assert!(u.is_some()); - - let u: Option = num::cast(i32::MIN); - assert!(u.is_some()); - let u: Option = num::cast(i32::MIN); - assert!(u.is_some()); - - let u: Option = num::cast(i64::MIN); - assert!(u.is_some()); - let u: Option = num::cast(u64::MIN); - assert!(u.is_some()); - - let u: Option = num::cast(f32::MIN); - assert!(u.is_some()); - } - - #[test] - fn f32_as_u8_overflow() { - let array = Float32Array::from_slice(&[1.1, 5000.0]); - let b = cast(&array, &DataType::UInt8).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - let expected = UInt8Array::from(&[Some(1), None]); - assert_eq!(c, &expected); - - let b = cast_with_options(&array, &DataType::UInt8, CastOptions { wrapped: true }).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - let expected = UInt8Array::from(&[Some(1), Some(255)]); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_u8() { - let array = Int32Array::from_slice(&[-5, 6, -7, 8, 100000000]); - let b = cast(&array, &DataType::UInt8).unwrap(); - let expected = UInt8Array::from(&[None, Some(6), None, Some(8), None]); - let c = b.as_any().downcast_ref::().unwrap(); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_u8_sliced() { - let array = Int32Array::from_slice(&[-5, 6, -7, 8, 100000000]); - let array = array.slice(2, 3); - let b = cast(&array, &DataType::UInt8).unwrap(); - let expected = UInt8Array::from(&[None, Some(8), None]); - let c = b.as_any().downcast_ref::().unwrap(); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_i32() { - let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - let b = cast(&array, &DataType::Int32).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - - let expected = &[5, 6, 7, 8, 9]; - let expected = Int32Array::from_slice(expected); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_list_i32() { - let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - let b = cast( - &array, - &DataType::List(Box::new(Field::new("item", DataType::Int32, true))), - ) - .unwrap(); - - let arr = b.as_any().downcast_ref::>().unwrap(); - assert_eq!(&[0, 1, 2, 3, 4, 5], arr.offsets().as_slice()); - let values = arr.values(); - let c = values - .as_any() - .downcast_ref::>() - .unwrap(); - - let expected = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_list_i32_nullable() { - let input = [Some(5), None, Some(7), Some(8), Some(9)]; - - let array = Int32Array::from(input); - let b = cast( - &array, - &DataType::List(Box::new(Field::new("item", DataType::Int32, true))), - ) - .unwrap(); - - let arr = b.as_any().downcast_ref::>().unwrap(); - assert_eq!(&[0, 1, 2, 3, 4, 5], arr.offsets().as_slice()); - let values = arr.values(); - let c = values.as_any().downcast_ref::().unwrap(); - - let expected = &[Some(5), None, Some(7), Some(8), Some(9)]; - let expected = Int32Array::from(expected); - assert_eq!(c, &expected); - } - - #[test] - fn i32_to_list_f64_nullable_sliced() { - let input = [Some(5), None, Some(7), Some(8), None, Some(10)]; - - let array = Int32Array::from(input); - - let array = array.slice(2, 4); - let b = cast( - &array, - &DataType::List(Box::new(Field::new("item", DataType::Float64, true))), - ) - .unwrap(); - - let arr = b.as_any().downcast_ref::>().unwrap(); - assert_eq!(&[0, 1, 2, 3, 4], arr.offsets().as_slice()); - let values = arr.values(); - let c = values.as_any().downcast_ref::().unwrap(); - - let expected = &[Some(7.0), Some(8.0), None, Some(10.0)]; - let expected = Float64Array::from(expected); - assert_eq!(c, &expected); - } - - #[test] - fn utf8_to_i32() { - let array = Utf8Array::::from_slice(&["5", "6", "seven", "8", "9.1"]); - let b = cast(&array, &DataType::Int32).unwrap(); - let c = b.as_any().downcast_ref::>().unwrap(); - - let expected = &[Some(5), Some(6), None, Some(8), None]; - let expected = Int32Array::from(expected); - assert_eq!(c, &expected); - } - - #[test] - fn bool_to_i32() { - let array = BooleanArray::from(vec![Some(true), Some(false), None]); - let b = cast(&array, &DataType::Int32).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - - let expected = &[Some(1), Some(0), None]; - let expected = Int32Array::from(expected); - assert_eq!(c, &expected); - } - - #[test] - fn bool_to_f64() { - let array = BooleanArray::from(vec![Some(true), Some(false), None]); - let b = cast(&array, &DataType::Float64).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - - let expected = &[Some(1.0), Some(0.0), None]; - let expected = Float64Array::from(expected); - assert_eq!(c, &expected); - } - - #[test] - #[should_panic(expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported")] - fn int32_to_timestamp() { - let array = Int32Array::from(&[Some(2), Some(10), None]); - cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap(); - } - - #[test] - fn consistency() { - use crate::datatypes::DataType::*; - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - List(Box::new(Field::new("a", Utf8, true))), - LargeList(Box::new(Field::new("a", Utf8, true))), - ]; - datatypes - .clone() - .into_iter() - .zip(datatypes.into_iter()) - .for_each(|(d1, d2)| { - let array = new_null_array(d1.clone(), 10); - if can_cast_types(&d1, &d2) { - let result = cast(array.as_ref(), &d2); - if let Ok(result) = result { - assert_eq!(result.data_type(), &d2); - } else { - panic!("Cast should have not failed") - } - } else { - assert!( - cast_with_options(array.as_ref(), &d2, CastOptions::default()).is_err() - ); - } - }); - } - - fn test_primitive_to_primitive( - lhs: &[I], - lhs_type: DataType, - expected: &[O], - expected_type: DataType, - ) { - let a = PrimitiveArray::::from_slice(lhs).to(lhs_type); - let b = cast(&a, &expected_type).unwrap(); - let b = b.as_any().downcast_ref::>().unwrap(); - let expected = PrimitiveArray::::from_slice(expected).to(expected_type); - assert_eq!(b, &expected); - } - - #[test] - fn date32_to_date64() { - test_primitive_to_primitive( - &[10000i32, 17890], - DataType::Date32, - &[864000000000i64, 1545696000000], - DataType::Date64, - ); - } - - #[test] - fn date64_to_date32() { - test_primitive_to_primitive( - &[864000000005i64, 1545696000001], - DataType::Date64, - &[10000i32, 17890], - DataType::Date32, - ); - } - - #[test] - fn date32_to_int32() { - test_primitive_to_primitive( - &[10000i32, 17890], - DataType::Date32, - &[10000i32, 17890], - DataType::Int32, - ); - } - - #[test] - fn int32_to_date32() { - test_primitive_to_primitive( - &[10000i32, 17890], - DataType::Int32, - &[10000i32, 17890], - DataType::Date32, - ); - } - - #[test] - fn timestamp_to_date32() { - test_primitive_to_primitive( - &[864000000005i64, 1545696000001], - DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), - &[10000i32, 17890], - DataType::Date32, - ); - } - - #[test] - fn timestamp_to_date64() { - test_primitive_to_primitive( - &[864000000005i64, 1545696000001], - DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), - &[864000000005i64, 1545696000001i64], - DataType::Date64, - ); - } - - #[test] - fn timestamp_to_i64() { - test_primitive_to_primitive( - &[864000000005i64, 1545696000001], - DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), - &[864000000005i64, 1545696000001i64], - DataType::Int64, - ); - } - - #[test] - fn timestamp_to_timestamp() { - test_primitive_to_primitive( - &[864000003005i64, 1545696002001], - DataType::Timestamp(TimeUnit::Millisecond, None), - &[864000003i64, 1545696002], - DataType::Timestamp(TimeUnit::Second, None), - ); - } - - #[test] - fn utf8_to_dict() { - let array = Utf8Array::::from(&[Some("one"), None, Some("three"), Some("one")]); - - // Cast to a dictionary (same value type, Utf8) - let cast_type = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)); - let result = cast(&array, &cast_type).expect("cast failed"); - - let mut expected = MutableDictionaryArray::>::new(); - expected - .try_extend([Some("one"), None, Some("three"), Some("one")]) - .unwrap(); - let expected: DictionaryArray = expected.into(); - assert_eq!(expected, result.as_ref()); - } - - #[test] - fn dict_to_utf8() { - let mut array = MutableDictionaryArray::>::new(); - array - .try_extend([Some("one"), None, Some("three"), Some("one")]) - .unwrap(); - let array: DictionaryArray = array.into(); - - let result = cast(&array, &DataType::Utf8).expect("cast failed"); - - let expected = Utf8Array::::from(&[Some("one"), None, Some("three"), Some("one")]); - - assert_eq!(expected, result.as_ref()); - } - - #[test] - fn i32_to_dict() { - let array = Int32Array::from(&[Some(1), None, Some(3), Some(1)]); - - // Cast to a dictionary (same value type, Utf8) - let cast_type = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Int32)); - let result = cast(&array, &cast_type).expect("cast failed"); - - let mut expected = MutableDictionaryArray::>::new(); - expected - .try_extend([Some(1), None, Some(3), Some(1)]) - .unwrap(); - let expected: DictionaryArray = expected.into(); - assert_eq!(expected, result.as_ref()); - } - - #[test] - fn list_to_list() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(4), None, Some(6)]), - ]; - - let expected_data = data - .iter() - .map(|x| x.as_ref().map(|x| x.iter().map(|x| x.map(|x| x as u16)))); - - let mut array = MutableListArray::>::new(); - array.try_extend(data.clone()).unwrap(); - let array: ListArray = array.into(); - - let mut expected = MutableListArray::>::new(); - expected.try_extend(expected_data).unwrap(); - let expected: ListArray = expected.into(); - - let result = cast(&array, expected.data_type()).unwrap(); - assert_eq!(expected, result.as_ref()); - } - - /* - #[test] - fn dict_to_dict_bad_index_value_primitive() { - use DataType::*; - // test converting from an array that has indexes of a type - // that are out of bounds for a particular other kind of - // index. - - let keys_builder = PrimitiveBuilder::::new(10); - let values_builder = PrimitiveBuilder::::new(10); - let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); - - // add 200 distinct values (which can be stored by a - // dictionary indexed by int32, but not a dictionary indexed - // with int8) - for i in 0..200 { - builder.append(i).unwrap(); - } - let array: ArrayRef = Arc::new(builder.finish()); - - let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - let res = cast_with_options(&array, &cast_type); - assert, CastOptions::default())!(res.is_err()); - let actual_error = format!("{:?}", res); - let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8"; - assert!( - actual_error.contains(expected_error), - "did not find expected error '{}' in actual error '{}'", - actual_error, - expected_error - ); - } - - #[test] - fn dict_to_dict_bad_index_value_utf8() { - use DataType::*; - // Same test as dict_to_dict_bad_index_value but use - // string values (and encode the expected behavior here); - - let keys_builder = PrimitiveBuilder::::new(10); - let values_builder = StringBuilder::new(10); - let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); - - // add 200 distinct values (which can be stored by a - // dictionary indexed by int32, but not a dictionary indexed - // with int8) - for i in 0..200 { - let val = format!("val{}", i); - builder.append(&val).unwrap(); - } - let array: ArrayRef = Arc::new(builder.finish()); - - let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - let res = cast_with_options(&array, &cast_type); - assert, CastOptions::default())!(res.is_err()); - let actual_error = format!("{:?}", res); - let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8"; - assert!( - actual_error.contains(expected_error), - "did not find expected error '{}' in actual error '{}'", - actual_error, - expected_error - ); - } - - #[test] - fn utf8_to_date32() { - use chrono::NaiveDate; - let from_ymd = chrono::NaiveDate::from_ymd; - let since = chrono::NaiveDate::signed_duration_since; - - let a = StringArray::from(vec![ - "2000-01-01", // valid date with leading 0s - "2000-2-2", // valid date without leading 0s - "2000-00-00", // invalid month and day - "2000-01-01T12:00:00", // date + time is invalid - "2000", // just a year is invalid - ]); - let array = Arc::new(a) as ArrayRef; - let b = cast_with_options(&array, &DataType::Date32, CastOptions::default()).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - - // test valid inputs - let date_value = since(NaiveDate::from_ymd(2000, 1, 1), from_ymd(1970, 1, 1)) - .num_days() as i32; - assert_eq!(true, c.is_valid(0)); // "2000-01-01" - assert_eq!(date_value, c.value(0)); - - let date_value = since(NaiveDate::from_ymd(2000, 2, 2), from_ymd(1970, 1, 1)) - .num_days() as i32; - assert_eq!(true, c.is_valid(1)); // "2000-2-2" - assert_eq!(date_value, c.value(1)); - - // test invalid inputs - assert_eq!(false, c.is_valid(2)); // "2000-00-00" - assert_eq!(false, c.is_valid(3)); // "2000-01-01T12:00:00" - assert_eq!(false, c.is_valid(4)); // "2000" - } - - #[test] - fn utf8_to_date64() { - let a = StringArray::from(vec![ - "2000-01-01T12:00:00", // date + time valid - "2020-12-15T12:34:56", // date + time valid - "2020-2-2T12:34:56", // valid date time without leading 0s - "2000-00-00T12:00:00", // invalid month and day - "2000-01-01 12:00:00", // missing the 'T' - "2000-01-01", // just a date is invalid - ]); - let array = Arc::new(a) as ArrayRef; - let b = cast_with_options(&array, &DataType::Date64, CastOptions::default()).unwrap(); - let c = b.as_any().downcast_ref::().unwrap(); - - // test valid inputs - assert_eq!(true, c.is_valid(0)); // "2000-01-01T12:00:00" - assert_eq!(946728000000, c.value(0)); - assert_eq!(true, c.is_valid(1)); // "2020-12-15T12:34:56" - assert_eq!(1608035696000, c.value(1)); - assert_eq!(true, c.is_valid(2)); // "2020-2-2T12:34:56" - assert_eq!(1580646896000, c.value(2)); - - // test invalid inputs - assert_eq!(false, c.is_valid(3)); // "2000-00-00T12:00:00" - assert_eq!(false, c.is_valid(4)); // "2000-01-01 12:00:00" - assert_eq!(false, c.is_valid(5)); // "2000-01-01" - } - - fn make_union_array() -> UnionArray { - let mut builder = UnionBuilder::new_dense(7); - builder.append::("a", 1).unwrap(); - builder.append::("b", 2).unwrap(); - builder.build().unwrap() - } - */ -} diff --git a/tests/it/compute/cast.rs b/tests/it/compute/cast.rs new file mode 100644 index 00000000000..8abb8afc29e --- /dev/null +++ b/tests/it/compute/cast.rs @@ -0,0 +1,635 @@ +use arrow2::array::*; +use arrow2::compute::cast::{can_cast_types, cast, wrapping_cast}; +use arrow2::datatypes::*; +use arrow2::types::NativeType; + +#[test] +fn i32_to_f64() { + let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + let b = cast(&array, &DataType::Float64).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert!((5.0 - c.value(0)).abs() < f64::EPSILON); + assert!((6.0 - c.value(1)).abs() < f64::EPSILON); + assert!((7.0 - c.value(2)).abs() < f64::EPSILON); + assert!((8.0 - c.value(3)).abs() < f64::EPSILON); + assert!((9.0 - c.value(4)).abs() < f64::EPSILON); +} + +#[test] +fn i32_as_f64_no_overflow() { + let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + let b = wrapping_cast(&array, &DataType::Float64).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert!((5.0 - c.value(0)).abs() < f64::EPSILON); + assert!((6.0 - c.value(1)).abs() < f64::EPSILON); + assert!((7.0 - c.value(2)).abs() < f64::EPSILON); + assert!((8.0 - c.value(3)).abs() < f64::EPSILON); + assert!((9.0 - c.value(4)).abs() < f64::EPSILON); +} + +#[test] +fn u16_as_u8_overflow() { + let array = UInt16Array::from_slice(&[255, 256, 257, 258, 259]); + let b = wrapping_cast(&array, &DataType::UInt8).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + let values = c.values().as_slice(); + + println!("{}", 255u8.wrapping_add(10)); + + assert_eq!(values, &[255, 0, 1, 2, 3]) +} + +#[test] +fn u16_as_u8_no_overflow() { + let array = UInt16Array::from_slice(&[1, 2, 3, 4, 5]); + let b = wrapping_cast(&array, &DataType::UInt8).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + let values = c.values().as_slice(); + assert_eq!(values, &[1, 2, 3, 4, 5]) +} + +#[test] +fn float_range_max() { + //floats to integers + let u: Option = num::cast(f32::MAX); + assert_eq!(u, None); + let u: Option = num::cast(f32::MAX); + assert_eq!(u, None); + + let u: Option = num::cast(f32::MAX); + assert_eq!(u, None); + let u: Option = num::cast(f32::MAX); + assert_eq!(u, None); + + let u: Option = num::cast(f64::MAX); + assert_eq!(u, None); + let u: Option = num::cast(f64::MAX); + assert_eq!(u, None); + + let u: Option = num::cast(f64::MAX); + assert_eq!(u, None); + let u: Option = num::cast(f64::MAX); + assert_eq!(u, None); + + //integers to floats + let u: Option = num::cast(u32::MAX); + assert!(u.is_some()); + let u: Option = num::cast(u32::MAX); + assert!(u.is_some()); + + let u: Option = num::cast(i32::MAX); + assert!(u.is_some()); + let u: Option = num::cast(i32::MAX); + assert!(u.is_some()); + + let u: Option = num::cast(i64::MAX); + assert!(u.is_some()); + let u: Option = num::cast(u64::MAX); + assert!(u.is_some()); + + let u: Option = num::cast(f32::MAX); + assert!(u.is_some()); +} + +#[test] +fn float_range_min() { + //floats to integers + let u: Option = num::cast(f32::MIN); + assert_eq!(u, None); + let u: Option = num::cast(f32::MIN); + assert_eq!(u, None); + + let u: Option = num::cast(f32::MIN); + assert_eq!(u, None); + let u: Option = num::cast(f32::MIN); + assert_eq!(u, None); + + let u: Option = num::cast(f64::MIN); + assert_eq!(u, None); + let u: Option = num::cast(f64::MIN); + assert_eq!(u, None); + + let u: Option = num::cast(f64::MIN); + assert_eq!(u, None); + let u: Option = num::cast(f64::MIN); + assert_eq!(u, None); + + //integers to floats + let u: Option = num::cast(u32::MIN); + assert!(u.is_some()); + let u: Option = num::cast(u32::MIN); + assert!(u.is_some()); + + let u: Option = num::cast(i32::MIN); + assert!(u.is_some()); + let u: Option = num::cast(i32::MIN); + assert!(u.is_some()); + + let u: Option = num::cast(i64::MIN); + assert!(u.is_some()); + let u: Option = num::cast(u64::MIN); + assert!(u.is_some()); + + let u: Option = num::cast(f32::MIN); + assert!(u.is_some()); +} + +#[test] +fn f32_as_u8_overflow() { + let array = Float32Array::from_slice(&[1.1, 5000.0]); + let b = cast(&array, &DataType::UInt8).unwrap(); + let expected = UInt8Array::from(&[Some(1), None]); + assert_eq!(expected, b.as_ref()); + + let b = wrapping_cast(&array, &DataType::UInt8).unwrap(); + let expected = UInt8Array::from(&[Some(1), Some(255)]); + assert_eq!(expected, b.as_ref()); +} + +#[test] +fn i32_to_u8() { + let array = Int32Array::from_slice(&[-5, 6, -7, 8, 100000000]); + let b = cast(&array, &DataType::UInt8).unwrap(); + let expected = UInt8Array::from(&[None, Some(6), None, Some(8), None]); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(c, &expected); +} + +#[test] +fn i32_to_u8_sliced() { + let array = Int32Array::from_slice(&[-5, 6, -7, 8, 100000000]); + let array = array.slice(2, 3); + let b = cast(&array, &DataType::UInt8).unwrap(); + let expected = UInt8Array::from(&[None, Some(8), None]); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(c, &expected); +} + +#[test] +fn i32_to_i32() { + let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + let b = cast(&array, &DataType::Int32).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + + let expected = &[5, 6, 7, 8, 9]; + let expected = Int32Array::from_slice(expected); + assert_eq!(c, &expected); +} + +#[test] +fn i32_to_list_i32() { + let array = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + let b = cast( + &array, + &DataType::List(Box::new(Field::new("item", DataType::Int32, true))), + ) + .unwrap(); + + let arr = b.as_any().downcast_ref::>().unwrap(); + assert_eq!(&[0, 1, 2, 3, 4, 5], arr.offsets().as_slice()); + let values = arr.values(); + let c = values + .as_any() + .downcast_ref::>() + .unwrap(); + + let expected = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + assert_eq!(c, &expected); +} + +#[test] +fn i32_to_list_i32_nullable() { + let input = [Some(5), None, Some(7), Some(8), Some(9)]; + + let array = Int32Array::from(input); + let b = cast( + &array, + &DataType::List(Box::new(Field::new("item", DataType::Int32, true))), + ) + .unwrap(); + + let arr = b.as_any().downcast_ref::>().unwrap(); + assert_eq!(&[0, 1, 2, 3, 4, 5], arr.offsets().as_slice()); + let values = arr.values(); + let c = values.as_any().downcast_ref::().unwrap(); + + let expected = &[Some(5), None, Some(7), Some(8), Some(9)]; + let expected = Int32Array::from(expected); + assert_eq!(c, &expected); +} + +#[test] +fn i32_to_list_f64_nullable_sliced() { + let input = [Some(5), None, Some(7), Some(8), None, Some(10)]; + + let array = Int32Array::from(input); + + let array = array.slice(2, 4); + let b = cast( + &array, + &DataType::List(Box::new(Field::new("item", DataType::Float64, true))), + ) + .unwrap(); + + let arr = b.as_any().downcast_ref::>().unwrap(); + assert_eq!(&[0, 1, 2, 3, 4], arr.offsets().as_slice()); + let values = arr.values(); + let c = values.as_any().downcast_ref::().unwrap(); + + let expected = &[Some(7.0), Some(8.0), None, Some(10.0)]; + let expected = Float64Array::from(expected); + assert_eq!(c, &expected); +} + +#[test] +fn utf8_to_i32() { + let array = Utf8Array::::from_slice(&["5", "6", "seven", "8", "9.1"]); + let b = cast(&array, &DataType::Int32).unwrap(); + let c = b.as_any().downcast_ref::>().unwrap(); + + let expected = &[Some(5), Some(6), None, Some(8), None]; + let expected = Int32Array::from(expected); + assert_eq!(c, &expected); +} + +#[test] +fn bool_to_i32() { + let array = BooleanArray::from(vec![Some(true), Some(false), None]); + let b = cast(&array, &DataType::Int32).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + + let expected = &[Some(1), Some(0), None]; + let expected = Int32Array::from(expected); + assert_eq!(c, &expected); +} + +#[test] +fn bool_to_f64() { + let array = BooleanArray::from(vec![Some(true), Some(false), None]); + let b = cast(&array, &DataType::Float64).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + + let expected = &[Some(1.0), Some(0.0), None]; + let expected = Float64Array::from(expected); + assert_eq!(c, &expected); +} + +#[test] +#[should_panic(expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported")] +fn int32_to_timestamp() { + let array = Int32Array::from(&[Some(2), Some(10), None]); + cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap(); +} + +#[test] +fn consistency() { + use DataType::*; + let datatypes = vec![ + Null, + Boolean, + UInt8, + UInt16, + UInt32, + UInt64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Timestamp(TimeUnit::Second, None), + Timestamp(TimeUnit::Millisecond, None), + Timestamp(TimeUnit::Microsecond, None), + Timestamp(TimeUnit::Nanosecond, None), + Time64(TimeUnit::Microsecond), + Time64(TimeUnit::Nanosecond), + Date32, + Time32(TimeUnit::Second), + Time32(TimeUnit::Millisecond), + Date64, + Utf8, + LargeUtf8, + Binary, + LargeBinary, + Duration(TimeUnit::Second), + Duration(TimeUnit::Millisecond), + Duration(TimeUnit::Microsecond), + Duration(TimeUnit::Nanosecond), + List(Box::new(Field::new("a", Utf8, true))), + LargeList(Box::new(Field::new("a", Utf8, true))), + ]; + for d1 in &datatypes { + for d2 in &datatypes { + let array = new_null_array(d1.clone(), 10); + if can_cast_types(d1, d2) { + let result = cast(array.as_ref(), d2); + if let Ok(result) = result { + assert_eq!(result.data_type(), d2, "type not equal: {:?} {:?}", d1, d2); + } else { + panic!("Cast should have not failed {:?} {:?}", d1, d2); + } + } else if cast(array.as_ref(), d2).is_ok() { + panic!("Cast should have failed {:?} {:?}", d1, d2); + } + } + } +} + +fn test_primitive_to_primitive( + lhs: &[I], + lhs_type: DataType, + expected: &[O], + expected_type: DataType, +) { + let a = PrimitiveArray::::from_slice(lhs).to(lhs_type); + let b = cast(&a, &expected_type).unwrap(); + let b = b.as_any().downcast_ref::>().unwrap(); + let expected = PrimitiveArray::::from_slice(expected).to(expected_type); + assert_eq!(b, &expected); +} + +#[test] +fn date32_to_date64() { + test_primitive_to_primitive( + &[10000i32, 17890], + DataType::Date32, + &[864000000000i64, 1545696000000], + DataType::Date64, + ); +} + +#[test] +fn date64_to_date32() { + test_primitive_to_primitive( + &[864000000005i64, 1545696000001], + DataType::Date64, + &[10000i32, 17890], + DataType::Date32, + ); +} + +#[test] +fn date32_to_int32() { + test_primitive_to_primitive( + &[10000i32, 17890], + DataType::Date32, + &[10000i32, 17890], + DataType::Int32, + ); +} + +#[test] +fn int32_to_date32() { + test_primitive_to_primitive( + &[10000i32, 17890], + DataType::Int32, + &[10000i32, 17890], + DataType::Date32, + ); +} + +#[test] +fn timestamp_to_date32() { + test_primitive_to_primitive( + &[864000000005i64, 1545696000001], + DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), + &[10000i32, 17890], + DataType::Date32, + ); +} + +#[test] +fn timestamp_to_date64() { + test_primitive_to_primitive( + &[864000000005i64, 1545696000001], + DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), + &[864000000005i64, 1545696000001i64], + DataType::Date64, + ); +} + +#[test] +fn timestamp_to_i64() { + test_primitive_to_primitive( + &[864000000005i64, 1545696000001], + DataType::Timestamp(TimeUnit::Millisecond, Some(String::from("UTC"))), + &[864000000005i64, 1545696000001i64], + DataType::Int64, + ); +} + +#[test] +fn timestamp_to_timestamp() { + test_primitive_to_primitive( + &[864000003005i64, 1545696002001], + DataType::Timestamp(TimeUnit::Millisecond, None), + &[864000003i64, 1545696002], + DataType::Timestamp(TimeUnit::Second, None), + ); +} + +#[test] +fn utf8_to_dict() { + let array = Utf8Array::::from(&[Some("one"), None, Some("three"), Some("one")]); + + // Cast to a dictionary (same value type, Utf8) + let cast_type = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)); + let result = cast(&array, &cast_type).expect("cast failed"); + + let mut expected = MutableDictionaryArray::>::new(); + expected + .try_extend([Some("one"), None, Some("three"), Some("one")]) + .unwrap(); + let expected: DictionaryArray = expected.into(); + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn dict_to_utf8() { + let mut array = MutableDictionaryArray::>::new(); + array + .try_extend([Some("one"), None, Some("three"), Some("one")]) + .unwrap(); + let array: DictionaryArray = array.into(); + + let result = cast(&array, &DataType::Utf8).expect("cast failed"); + + let expected = Utf8Array::::from(&[Some("one"), None, Some("three"), Some("one")]); + + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn i32_to_dict() { + let array = Int32Array::from(&[Some(1), None, Some(3), Some(1)]); + + // Cast to a dictionary (same value type, Utf8) + let cast_type = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Int32)); + let result = cast(&array, &cast_type).expect("cast failed"); + + let mut expected = MutableDictionaryArray::>::new(); + expected + .try_extend([Some(1), None, Some(3), Some(1)]) + .unwrap(); + let expected: DictionaryArray = expected.into(); + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn list_to_list() { + let data = vec![ + Some(vec![Some(1i32), Some(2), Some(3)]), + None, + Some(vec![Some(4), None, Some(6)]), + ]; + + let expected_data = data + .iter() + .map(|x| x.as_ref().map(|x| x.iter().map(|x| x.map(|x| x as u16)))); + + let mut array = MutableListArray::>::new(); + array.try_extend(data.clone()).unwrap(); + let array: ListArray = array.into(); + + let mut expected = MutableListArray::>::new(); + expected.try_extend(expected_data).unwrap(); + let expected: ListArray = expected.into(); + + let result = cast(&array, expected.data_type()).unwrap(); + assert_eq!(expected, result.as_ref()); +} + +/* +#[test] +fn dict_to_dict_bad_index_value_primitive() { + use DataType::*; + // test converting from an array that has indexes of a type + // that are out of bounds for a particular other kind of + // index. + + let keys_builder = PrimitiveBuilder::::new(10); + let values_builder = PrimitiveBuilder::::new(10); + let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); + + // add 200 distinct values (which can be stored by a + // dictionary indexed by int32, but not a dictionary indexed + // with int8) + for i in 0..200 { + builder.append(i).unwrap(); + } + let array: ArrayRef = Arc::new(builder.finish()); + + let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + let res = cast_with_options(&array, &cast_type); + assert, CastOptions::default())!(res.is_err()); + let actual_error = format!("{:?}", res); + let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8"; + assert!( + actual_error.contains(expected_error), + "did not find expected error '{}' in actual error '{}'", + actual_error, + expected_error + ); +} + +#[test] +fn dict_to_dict_bad_index_value_utf8() { + use DataType::*; + // Same test as dict_to_dict_bad_index_value but use + // string values (and encode the expected behavior here); + + let keys_builder = PrimitiveBuilder::::new(10); + let values_builder = StringBuilder::new(10); + let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); + + // add 200 distinct values (which can be stored by a + // dictionary indexed by int32, but not a dictionary indexed + // with int8) + for i in 0..200 { + let val = format!("val{}", i); + builder.append(&val).unwrap(); + } + let array: ArrayRef = Arc::new(builder.finish()); + + let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + let res = cast_with_options(&array, &cast_type); + assert, CastOptions::default())!(res.is_err()); + let actual_error = format!("{:?}", res); + let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8"; + assert!( + actual_error.contains(expected_error), + "did not find expected error '{}' in actual error '{}'", + actual_error, + expected_error + ); +} + +#[test] +fn utf8_to_date32() { + use chrono::NaiveDate; + let from_ymd = chrono::NaiveDate::from_ymd; + let since = chrono::NaiveDate::signed_duration_since; + + let a = StringArray::from(vec![ + "2000-01-01", // valid date with leading 0s + "2000-2-2", // valid date without leading 0s + "2000-00-00", // invalid month and day + "2000-01-01T12:00:00", // date + time is invalid + "2000", // just a year is invalid + ]); + let array = Arc::new(a) as ArrayRef; + let b = cast_with_options(&array, &DataType::Date32, CastOptions::default()).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + + // test valid inputs + let date_value = since(NaiveDate::from_ymd(2000, 1, 1), from_ymd(1970, 1, 1)) + .num_days() as i32; + assert_eq!(true, c.is_valid(0)); // "2000-01-01" + assert_eq!(date_value, c.value(0)); + + let date_value = since(NaiveDate::from_ymd(2000, 2, 2), from_ymd(1970, 1, 1)) + .num_days() as i32; + assert_eq!(true, c.is_valid(1)); // "2000-2-2" + assert_eq!(date_value, c.value(1)); + + // test invalid inputs + assert_eq!(false, c.is_valid(2)); // "2000-00-00" + assert_eq!(false, c.is_valid(3)); // "2000-01-01T12:00:00" + assert_eq!(false, c.is_valid(4)); // "2000" +} + +#[test] +fn utf8_to_date64() { + let a = StringArray::from(vec![ + "2000-01-01T12:00:00", // date + time valid + "2020-12-15T12:34:56", // date + time valid + "2020-2-2T12:34:56", // valid date time without leading 0s + "2000-00-00T12:00:00", // invalid month and day + "2000-01-01 12:00:00", // missing the 'T' + "2000-01-01", // just a date is invalid + ]); + let array = Arc::new(a) as ArrayRef; + let b = cast_with_options(&array, &DataType::Date64, CastOptions::default()).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + + // test valid inputs + assert_eq!(true, c.is_valid(0)); // "2000-01-01T12:00:00" + assert_eq!(946728000000, c.value(0)); + assert_eq!(true, c.is_valid(1)); // "2020-12-15T12:34:56" + assert_eq!(1608035696000, c.value(1)); + assert_eq!(true, c.is_valid(2)); // "2020-2-2T12:34:56" + assert_eq!(1580646896000, c.value(2)); + + // test invalid inputs + assert_eq!(false, c.is_valid(3)); // "2000-00-00T12:00:00" + assert_eq!(false, c.is_valid(4)); // "2000-01-01 12:00:00" + assert_eq!(false, c.is_valid(5)); // "2000-01-01" +} + +fn make_union_array() -> UnionArray { + let mut builder = UnionBuilder::new_dense(7); + builder.append::("a", 1).unwrap(); + builder.append::("b", 2).unwrap(); + builder.build().unwrap() +} +*/ diff --git a/tests/it/compute/mod.rs b/tests/it/compute/mod.rs new file mode 100644 index 00000000000..f64549ca027 --- /dev/null +++ b/tests/it/compute/mod.rs @@ -0,0 +1 @@ +mod cast; diff --git a/tests/it/main.rs b/tests/it/main.rs index 9cd03e6b78a..b1501a9f243 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -6,3 +6,5 @@ mod ffi; mod io; mod test_util; + +mod compute;