diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index 4702179c7839..5504c4ac0713 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -668,7 +668,7 @@ mod tests { "entry", DataType::Struct(vec![ Field::new("key", DataType::Utf8, false), - Field::new("key", DataType::Int32, true), + Field::new("value", DataType::Int32, true), ]), false, )), diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs index 4912c5e4d487..9592b59b2732 100644 --- a/arrow/src/json/reader.rs +++ b/arrow/src/json/reader.rs @@ -1225,6 +1225,14 @@ impl Decoder { }) .collect::(), ) as ArrayRef), + DataType::Binary => Ok(Arc::new( + rows.iter() + .map(|row| { + let maybe_value = row.get(field.name()); + maybe_value.and_then(|value| value.as_str()) + }) + .collect::(), + ) as ArrayRef), DataType::List(ref list_field) => { match list_field.data_type() { DataType::Dictionary(ref key_ty, _) => { @@ -3140,6 +3148,38 @@ mod tests { assert_eq!(batch.num_rows(), 3); } + #[test] + fn test_json_read_binary_structs() { + let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]); + let decoder = Decoder::new(Arc::new(schema), 1024, None); + let batch = decoder + .next_batch( + &mut vec![ + Ok(serde_json::json!({ + "c1": "₁₂₃", + })), + Ok(serde_json::json!({ + "c1": "foo", + })), + ] + .into_iter(), + ) + .unwrap() + .unwrap(); + let data = batch.columns().iter().collect::>(); + + let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]); + let binary_values = BinaryArray::from(vec!["₁₂₃".as_bytes(), "foo".as_bytes()]); + let expected_batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(binary_values)]) + .unwrap(); + let expected_data = expected_batch.columns().iter().collect::>(); + + assert_eq!(data, expected_data); + assert_eq!(batch.num_columns(), 1); + assert_eq!(batch.num_rows(), 2); + } + #[test] fn test_json_iterator() { let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(5);