From dd5c15134ef192641188e799bf1a040bfada25f5 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 5 Feb 2022 18:14:48 +0100 Subject: [PATCH] fix unescaped '"' in json writing (#812) --- src/io/json/write/serialize.rs | 19 ++----------------- tests/it/io/json/write.rs | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 28bf0bd7563..b74e1c36302 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -1,5 +1,4 @@ use lexical_core::ToLexical; -use serde_json::Value; use streaming_iterator::StreamingIterator; use crate::bitmap::utils::zip_validity; @@ -47,7 +46,7 @@ fn utf8_serializer<'a, O: Offset>( array.iter(), |x, buf| { if let Some(x) = x { - utf8_serialize(x, buf) + serde_json::to_writer(buf, x).unwrap(); } else { buf.extend_from_slice(b"null") } @@ -137,20 +136,6 @@ fn list_serializer<'a, O: Offset>( )) } -#[inline] -fn utf8_serialize(value: &str, buf: &mut Vec) { - if value.as_bytes().is_ascii() { - buf.reserve(value.len() + 2); - buf.push(b'"'); - buf.extend_from_slice(value.as_bytes()); - buf.push(b'"'); - } else { - // it may contain reserved keywords: perform roundtrip for - // todo: avoid this roundtrip over serde_json - serde_json::to_writer(buf, &Value::String(value.to_string())).unwrap(); - } -} - fn new_serializer<'a>( array: &'a dyn Array, ) -> Box + 'a + Send + Sync> { @@ -189,7 +174,7 @@ fn serialize_item( buffer.push(b','); } first_item = false; - utf8_serialize(key, buffer); + serde_json::to_writer(&mut *buffer, key).unwrap(); buffer.push(b':'); buffer.extend(*value); } diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 51997c1d367..dc10fbff724 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -301,7 +301,26 @@ fn write_escaped_utf8() -> Result<()> { assert_eq!( String::from_utf8(buf).unwrap().as_bytes(), - b"{\"c1\":\"a\na\"}\n{\"c1\":null}\n" + b"{\"c1\":\"a\\na\"}\n{\"c1\":null}\n" + ); + Ok(()) +} + +#[test] +fn write_quotation_marks_in_utf8() -> Result<()> { + let a = Utf8Array::::from(&vec![Some("a\"a"), None]); + + let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap(); + + let buf = write_batch( + batch, + vec!["c1".to_string()], + json_write::LineDelimited::default(), + )?; + + assert_eq!( + String::from_utf8(buf).unwrap().as_bytes(), + b"{\"c1\":\"a\\\"a\"}\n{\"c1\":null}\n" ); Ok(()) }