From 53a313db26761ad04d3df6dfdba6ceb7c4197af9 Mon Sep 17 00:00:00 2001 From: roberto Date: Sun, 1 May 2022 04:04:25 +0200 Subject: [PATCH] MOD error on empty reader --- src/io/ndjson/read/deserialize.rs | 7 +++++++ src/io/ndjson/read/file.rs | 6 ++++++ tests/it/io/ndjson/read.rs | 15 ++++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/io/ndjson/read/deserialize.rs b/src/io/ndjson/read/deserialize.rs index 8364fec15db..e02927d0248 100644 --- a/src/io/ndjson/read/deserialize.rs +++ b/src/io/ndjson/read/deserialize.rs @@ -15,6 +15,13 @@ use super::super::super::json::read::_deserialize; /// # Errors /// This function errors iff any of the rows is not a valid JSON (i.e. the format is not valid NDJSON). pub fn deserialize(rows: &[String], data_type: DataType) -> Result, ArrowError> { + if rows.is_empty() { + return Err(ArrowError::ExternalFormat( + "Cannot deserialize 0 NDJSON rows because empty string is not a valid JSON value" + .to_string(), + )); + } + // deserialize strings to `Value`s let rows = rows .iter() diff --git a/src/io/ndjson/read/file.rs b/src/io/ndjson/read/file.rs index 9ec9a73a8c6..cfb49bf6bc8 100644 --- a/src/io/ndjson/read/file.rs +++ b/src/io/ndjson/read/file.rs @@ -104,6 +104,12 @@ pub fn infer( reader: &mut R, number_of_rows: Option, ) -> Result { + if !reader.fill_buf().map(|b| !b.is_empty())? { + return Err(ArrowError::ExternalFormat( + "Cannot infer NDJSON types on empty reader because empty string is not a valid JSON value".to_string(), + )); + } + let rows = vec!["".to_string(); 1]; // 1 <=> read row by row let mut reader = FileReader::new(reader, rows, number_of_rows); diff --git a/tests/it/io/ndjson/read.rs b/tests/it/io/ndjson/read.rs index ecea5d52431..4b74248c569 100644 --- a/tests/it/io/ndjson/read.rs +++ b/tests/it/io/ndjson/read.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use arrow2::array::*; use arrow2::datatypes::{DataType, Field}; -use arrow2::error::Result; +use arrow2::error::{ArrowError, Result}; use arrow2::io::ndjson::read as ndjson_read; use arrow2::io::ndjson::read::FallibleStreamingIterator; @@ -74,14 +74,15 @@ fn read_null() -> Result<()> { #[test] fn read_empty_reader() -> Result<()> { let ndjson = ""; - let expected_data_type = DataType::Null; - let data_type = infer(ndjson)?; - assert_eq!(expected_data_type, data_type); + let infer_error = infer(ndjson); + assert!(matches!(infer_error, Err(ArrowError::ExternalFormat(_)))); - let arrays = read_and_deserialize(ndjson, &data_type, 1000)?; - let expected: Vec> = vec![]; - assert_eq!(expected, arrays); + let deserialize_error = ndjson_read::deserialize(&[], DataType::Null); + assert!(matches!( + deserialize_error, + Err(ArrowError::ExternalFormat(_)) + )); Ok(()) }