diff --git a/src/io/json/write/mod.rs b/src/io/json/write/mod.rs index fcf1c435148..dc2c69944ad 100644 --- a/src/io/json/write/mod.rs +++ b/src/io/json/write/mod.rs @@ -1,6 +1,7 @@ //! APIs to write to JSON mod format; mod serialize; +mod utf8; pub use fallible_streaming_iterator::*; pub use format::*; pub use serialize::serialize; diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 28bf0bd7563..d67ef2b36de 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -1,5 +1,4 @@ use lexical_core::ToLexical; -use serde_json::Value; use streaming_iterator::StreamingIterator; use crate::bitmap::utils::zip_validity; @@ -8,6 +7,7 @@ use crate::io::iterator::BufStreamingIterator; use crate::util::lexical_to_bytes_mut; use crate::{array::*, datatypes::DataType, types::NativeType}; +use super::utf8::utf8_serialize; use super::{JsonArray, JsonFormat}; fn boolean_serializer<'a>( @@ -137,20 +137,6 @@ fn list_serializer<'a, O: Offset>( )) } -#[inline] -fn utf8_serialize(value: &str, buf: &mut Vec) { - if value.as_bytes().is_ascii() { - buf.reserve(value.len() + 2); - buf.push(b'"'); - buf.extend_from_slice(value.as_bytes()); - buf.push(b'"'); - } else { - // it may contain reserved keywords: perform roundtrip for - // todo: avoid this roundtrip over serde_json - serde_json::to_writer(buf, &Value::String(value.to_string())).unwrap(); - } -} - fn new_serializer<'a>( array: &'a dyn Array, ) -> Box + 'a + Send + Sync> { diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 51997c1d367..dc10fbff724 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -301,7 +301,26 @@ fn write_escaped_utf8() -> Result<()> { assert_eq!( String::from_utf8(buf).unwrap().as_bytes(), - b"{\"c1\":\"a\na\"}\n{\"c1\":null}\n" + b"{\"c1\":\"a\\na\"}\n{\"c1\":null}\n" + ); + Ok(()) +} + +#[test] +fn write_quotation_marks_in_utf8() -> Result<()> { + let a = Utf8Array::::from(&vec![Some("a\"a"), None]); + + let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap(); + + let buf = write_batch( + batch, + vec!["c1".to_string()], + json_write::LineDelimited::default(), + )?; + + assert_eq!( + String::from_utf8(buf).unwrap().as_bytes(), + b"{\"c1\":\"a\\\"a\"}\n{\"c1\":null}\n" ); Ok(()) }