From 6c3aa4b08911fbe26dbdd5814ad787031f37f971 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Tue, 24 Aug 2021 22:50:49 +0000 Subject: [PATCH] Added support for reading binary from CSV --- src/io/csv/read/deserialize.rs | 7 +++++++ tests/it/io/csv/read.rs | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/io/csv/read/deserialize.rs b/src/io/csv/read/deserialize.rs index 272b042122e..57e43f29a65 100644 --- a/src/io/csv/read/deserialize.rs +++ b/src/io/csv/read/deserialize.rs @@ -58,6 +58,11 @@ fn deserialize_utf8(rows: &[ByteRecord], column: usize) -> Arc::from_trusted_len_iter(iter)) } +fn deserialize_binary(rows: &[ByteRecord], column: usize) -> Arc { + let iter = rows.iter().map(|row| row.get(column)); + Arc::new(BinaryArray::::from_trusted_len_iter(iter)) +} + pub fn deserialize_column( rows: &[ByteRecord], column: usize, @@ -151,6 +156,8 @@ pub fn deserialize_column( } Utf8 => deserialize_utf8::(rows, column), LargeUtf8 => deserialize_utf8::(rows, column), + Binary => deserialize_binary::(rows, column), + LargeBinary => deserialize_binary::(rows, column), other => { return Err(ArrowError::NotYetImplemented(format!( "Deserializing type \"{:?}\" is not implemented", diff --git a/tests/it/io/csv/read.rs b/tests/it/io/csv/read.rs index bdba648e038..e0c13a28f50 100644 --- a/tests/it/io/csv/read.rs +++ b/tests/it/io/csv/read.rs @@ -173,3 +173,15 @@ fn float32() -> Result<()> { assert_eq!(expected, result.as_ref()); Ok(()) } + +#[test] +fn binary() -> Result<()> { + let input = vec!["aa", "bb"]; + let input = input.join("\n"); + + let expected = BinaryArray::::from([Some(b"aa"), Some(b"bb")]); + + let result = test_deserialize(&input, DataType::Binary)?; + assert_eq!(expected, result.as_ref()); + Ok(()) +}