Skip to content

Commit

Permalink
fix unescaped '"' in json writing (jorgecarleitao#812)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 6, 2022
1 parent b46a636 commit f133601
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 18 deletions.
19 changes: 2 additions & 17 deletions src/io/json/write/serialize.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use lexical_core::ToLexical;
use serde_json::Value;
use streaming_iterator::StreamingIterator;

use crate::bitmap::utils::zip_validity;
Expand Down Expand Up @@ -47,7 +46,7 @@ fn utf8_serializer<'a, O: Offset>(
array.iter(),
|x, buf| {
if let Some(x) = x {
utf8_serialize(x, buf)
serde_json::to_writer(buf, x).unwrap();
} else {
buf.extend_from_slice(b"null")
}
Expand Down Expand Up @@ -137,20 +136,6 @@ fn list_serializer<'a, O: Offset>(
))
}

#[inline]
fn utf8_serialize(value: &str, buf: &mut Vec<u8>) {
if value.as_bytes().is_ascii() {
buf.reserve(value.len() + 2);
buf.push(b'"');
buf.extend_from_slice(value.as_bytes());
buf.push(b'"');
} else {
// it may contain reserved keywords: perform roundtrip for
// todo: avoid this roundtrip over serde_json
serde_json::to_writer(buf, &Value::String(value.to_string())).unwrap();
}
}

fn new_serializer<'a>(
array: &'a dyn Array,
) -> Box<dyn StreamingIterator<Item = [u8]> + 'a + Send + Sync> {
Expand Down Expand Up @@ -189,7 +174,7 @@ fn serialize_item<F: JsonFormat>(
buffer.push(b',');
}
first_item = false;
utf8_serialize(key, buffer);
serde_json::to_writer(&mut *buffer, key).unwrap();
buffer.push(b':');
buffer.extend(*value);
}
Expand Down
21 changes: 20 additions & 1 deletion tests/it/io/json/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,26 @@ fn write_escaped_utf8() -> Result<()> {

assert_eq!(
String::from_utf8(buf).unwrap().as_bytes(),
b"{\"c1\":\"a\na\"}\n{\"c1\":null}\n"
b"{\"c1\":\"a\\na\"}\n{\"c1\":null}\n"
);
Ok(())
}

#[test]
fn write_quotation_marks_in_utf8() -> Result<()> {
let a = Utf8Array::<i32>::from(&vec![Some("a\"a"), None]);

let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap();

let buf = write_batch(
batch,
vec!["c1".to_string()],
json_write::LineDelimited::default(),
)?;

assert_eq!(
String::from_utf8(buf).unwrap().as_bytes(),
b"{\"c1\":\"a\\\"a\"}\n{\"c1\":null}\n"
);
Ok(())
}

0 comments on commit f133601

Please sign in to comment.