From e0a9b5e5a3d9adb20995151ea0d8eff5ce997bf7 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 30 Jul 2022 15:39:23 +0000 Subject: [PATCH] Improved testing --- tests/it/io/orc/read.rs | 105 ++++++++++++++------------------------- tests/it/io/orc/write.py | 10 ++-- 2 files changed, 41 insertions(+), 74 deletions(-) diff --git a/tests/it/io/orc/read.rs b/tests/it/io/orc/read.rs index 06a0c18f329..9d6ccaee4ff 100644 --- a/tests/it/io/orc/read.rs +++ b/tests/it/io/orc/read.rs @@ -13,25 +13,42 @@ fn infer() -> Result<(), Error> { Ok(()) } -#[test] -fn float32() -> Result<(), Error> { +fn deserialize_column(column_name: &str) -> Result, Error> { let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); let metadata = format::read::read_metadata(&mut reader)?; + let schema = read::infer_schema(&metadata.footer)?; + let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 1, vec![])?; + let (pos, field) = schema + .fields + .iter() + .enumerate() + .find(|f| f.1.name == column_name) + .unwrap(); + + let data_type = field.data_type.clone(); + let column = format::read::read_stripe_column( + &mut reader, + &metadata, + 0, + footer, + 1 + pos as u32, + vec![], + )?; + + read::deserialize(data_type, &column) +} +#[test] +fn float32() -> Result<(), Error> { assert_eq!( - read::deserialize(DataType::Float32, &column)?, + deserialize_column("float_nullable")?, Float32Array::from([Some(1.0), Some(2.0), None, Some(4.0), Some(5.0)]).boxed() ); - let (footer, scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 2, scratch)?; - assert_eq!( - read::deserialize(DataType::Float32, &column)?, + deserialize_column("float_required")?, Float32Array::from([Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]).boxed() ); Ok(()) @@ -39,23 +56,13 @@ fn float32() -> Result<(), Error> { #[test] fn float64() -> Result<(), Error> { - let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); - let metadata = format::read::read_metadata(&mut reader)?; - let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 7, vec![])?; - assert_eq!( - read::deserialize(DataType::Float64, &column)?, + deserialize_column("double_nullable")?, Float64Array::from([Some(1.0), Some(2.0), None, Some(4.0), Some(5.0)]).boxed() ); - let (footer, scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 8, scratch)?; - assert_eq!( - read::deserialize(DataType::Float64, &column)?, + deserialize_column("double_required")?, Float64Array::from([Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]).boxed() ); Ok(()) @@ -63,23 +70,13 @@ fn float64() -> Result<(), Error> { #[test] fn boolean() -> Result<(), Error> { - let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); - let metadata = format::read::read_metadata(&mut reader)?; - let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 3, vec![])?; - assert_eq!( - read::deserialize(DataType::Boolean, &column)?, + deserialize_column("bool_nullable")?, BooleanArray::from([Some(true), Some(false), None, Some(true), Some(false)]).boxed() ); - let (footer, scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 4, scratch)?; - assert_eq!( - read::deserialize(DataType::Boolean, &column)?, + deserialize_column("bool_required")?, BooleanArray::from([Some(true), Some(false), Some(true), Some(true), Some(false)]).boxed() ); Ok(()) @@ -87,23 +84,13 @@ fn boolean() -> Result<(), Error> { #[test] fn int() -> Result<(), Error> { - let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); - let metadata = format::read::read_metadata(&mut reader)?; - let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 6, vec![])?; - assert_eq!( - read::deserialize(DataType::Int32, &column)?, + deserialize_column("int_required")?, Int32Array::from([Some(5), Some(-5), Some(1), Some(5), Some(5)]).boxed() ); - let (footer, _scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 5, vec![])?; - assert_eq!( - read::deserialize(DataType::Int32, &column)?, + deserialize_column("int_nullable")?, Int32Array::from([Some(5), Some(-5), None, Some(5), Some(5)]).boxed() ); Ok(()) @@ -111,23 +98,13 @@ fn int() -> Result<(), Error> { #[test] fn bigint() -> Result<(), Error> { - let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); - let metadata = format::read::read_metadata(&mut reader)?; - let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 10, vec![])?; - assert_eq!( - read::deserialize(DataType::Int64, &column)?, + deserialize_column("bigint_required")?, Int64Array::from([Some(5), Some(-5), Some(1), Some(5), Some(5)]).boxed() ); - let (footer, scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 9, scratch)?; - assert_eq!( - read::deserialize(DataType::Int64, &column)?, + deserialize_column("bigint_nullable")?, Int64Array::from([Some(5), Some(-5), None, Some(5), Some(5)]).boxed() ); Ok(()) @@ -135,23 +112,13 @@ fn bigint() -> Result<(), Error> { #[test] fn utf8() -> Result<(), Error> { - let mut reader = std::fs::File::open("fixtures/pyorc/test.orc").unwrap(); - let metadata = format::read::read_metadata(&mut reader)?; - let footer = format::read::read_stripe_footer(&mut reader, &metadata, 0, &mut vec![])?; - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 11, vec![])?; - assert_eq!( - read::deserialize(DataType::Utf8, &column)?, + deserialize_column("utf8_required")?, Utf8Array::::from_slice(["a", "bb", "ccc", "dddd", "eeeee"]).boxed() ); - let (footer, _scratch) = column.into_inner(); - - let column = format::read::read_stripe_column(&mut reader, &metadata, 0, footer, 12, vec![])?; - assert_eq!( - read::deserialize(DataType::Utf8, &column)?, + deserialize_column("utf8_nullable")?, Utf8Array::::from([Some("a"), Some("bb"), None, Some("dddd"), Some("eeeee")]).boxed() ); Ok(()) diff --git a/tests/it/io/orc/write.py b/tests/it/io/orc/write.py index 25c3f467d22..2593b823eee 100644 --- a/tests/it/io/orc/write.py +++ b/tests/it/io/orc/write.py @@ -8,14 +8,14 @@ "float_required": [1.0, 2.0, 3.0, 4.0, 5.0], "bool_nullable": [True, False, None, True, False], "bool_required": [True, False, True, True, False], - "int_nulable": [5, -5, None, 5, 5], + "int_nullable": [5, -5, None, 5, 5], "int_required": [5, -5, 1, 5, 5], - "double_nulable": [1.0, 2.0, None, 4.0, 5.0], + "double_nullable": [1.0, 2.0, None, 4.0, 5.0], "double_required": [1.0, 2.0, 3.0, 4.0, 5.0], - "bigint_nulable": [5, -5, None, 5, 5], + "bigint_nullable": [5, -5, None, 5, 5], "bigint_required": [5, -5, 1, 5, 5], - "utf8_nulable": ["a", "bb", "ccc", "dddd", "eeeee"], - "utf8_required": ["a", "bb", None, "dddd", "eeeee"], + "utf8_required": ["a", "bb", "ccc", "dddd", "eeeee"], + "utf8_nullable": ["a", "bb", None, "dddd", "eeeee"], } def infer_schema(data):