Skip to content

Commit

Permalink
ARROW-11542: [Rust] fix validity bitmap buffer length count in json r…
Browse files Browse the repository at this point in the history
…eader

Closes apache#9436 from houqp/qp_json_read

Authored-by: Qingping Hou <[email protected]>
Signed-off-by: Andrew Lamb <[email protected]>
  • Loading branch information
houqp authored and GeorgeAp committed Jun 7, 2021
1 parent b13d18a commit 2d4509c
Showing 1 changed file with 81 additions and 2 deletions.
83 changes: 81 additions & 2 deletions rust/arrow/src/json/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,8 +959,14 @@ impl Decoder {
}
DataType::Struct(fields) => {
// extract list values, with non-lists converted to Value::Null
let len = rows.len();
let num_bytes = bit_util::ceil(len, 8);
let array_item_count = rows
.iter()
.map(|row| match row {
Value::Array(values) => values.len(),
_ => 1,
})
.sum();
let num_bytes = bit_util::ceil(array_item_count, 8);
let mut null_buffer = MutableBuffer::from_len_zeroed(num_bytes);
let mut struct_index = 0;
let rows: Vec<Value> = rows
Expand Down Expand Up @@ -2673,4 +2679,77 @@ mod tests {
assert_eq!(1, aa.value(3));
assert_eq!(5, aa.value(7));
}

#[test]
fn test_json_read_nested_list() {
let schema = Schema::new(vec![Field::new(
"c1",
DataType::List(Box::new(Field::new(
"item",
DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
true,
))),
true,
)]);

let decoder = Decoder::new(Arc::new(schema), 1024, None);
let batch = decoder
.next_batch(
&mut vec![
Ok(serde_json::json!({
"c1": [],
})),
Ok(serde_json::json!({
"c1": [["a", "b"], ["c"], ["e", "f"], ["g"], ["h"], ["i"], ["j"], ["k"]],
})),
Ok(serde_json::json!({
"c1": [["foo"], ["bar"]],
})),
]
.into_iter(),
)
.unwrap()
.unwrap();

assert_eq!(batch.num_columns(), 1);
assert_eq!(batch.num_rows(), 3);
}

#[test]
fn test_json_read_list_of_structs() {
let schema = Schema::new(vec![Field::new(
"c1",
DataType::List(Box::new(Field::new(
"item",
DataType::Struct(vec![Field::new("a", DataType::Int64, true)]),
true,
))),
true,
)]);

let decoder = Decoder::new(Arc::new(schema), 1024, None);
let batch = decoder
.next_batch(
// NOTE: total struct element count needs to be greater than
// bit_util::ceil(array_count, 8) to test validity bit buffer length calculation
// logic
&mut vec![
Ok(serde_json::json!({
"c1": [{"a": 1}],
})),
Ok(serde_json::json!({
"c1": [{"a": 2}, {"a": 3}, {"a": 4}, {"a": 5}, {"a": 6}, {"a": 7}],
})),
Ok(serde_json::json!({
"c1": [{"a": 10}, {"a": 11}],
})),
]
.into_iter(),
)
.unwrap()
.unwrap();

assert_eq!(batch.num_columns(), 1);
assert_eq!(batch.num_rows(), 3);
}
}

0 comments on commit 2d4509c

Please sign in to comment.