Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
feat: support serialization of dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 28, 2023
1 parent d06323a commit db53304
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
32 changes: 31 additions & 1 deletion src/io/json/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io::Write;
use streaming_iterator::StreamingIterator;

use crate::bitmap::utils::ZipValidity;
use crate::datatypes::TimeUnit;
use crate::datatypes::{IntegerType, TimeUnit};
use crate::io::iterator::BufStreamingIterator;
use crate::offset::Offset;
use crate::temporal_conversions::{
Expand Down Expand Up @@ -69,6 +69,24 @@ where
))
}

fn dictionary_utf8_serializer<'a, K: DictionaryKey, O: Offset>(
array: &'a DictionaryArray<K>,
) -> Box<dyn StreamingIterator<Item = [u8]> + 'a + Send + Sync> {
let iter = array.iter_typed::<Utf8Array<O>>().unwrap();

Box::new(BufStreamingIterator::new(
iter,
|x, buf| {
if let Some(x) = x {
utf8::write_str(buf, x).unwrap();
} else {
buf.extend_from_slice(b"null")
}
},
vec![],
))
}

fn utf8_serializer<'a, O: Offset>(
array: &'a Utf8Array<O>,
) -> Box<dyn StreamingIterator<Item = [u8]> + 'a + Send + Sync> {
Expand Down Expand Up @@ -257,6 +275,18 @@ pub(crate) fn new_serializer<'a>(
}
DataType::List(_) => list_serializer::<i32>(array.as_any().downcast_ref().unwrap()),
DataType::LargeList(_) => list_serializer::<i64>(array.as_any().downcast_ref().unwrap()),
other @ DataType::Dictionary(k, v, _) => match (k, &**v) {
(IntegerType::UInt32, DataType::LargeUtf8) => {
let array = array
.as_any()
.downcast_ref::<DictionaryArray<u32>>()
.unwrap();
dictionary_utf8_serializer::<u32, i64>(array)
}
_ => {
todo!("Writing {:?} to JSON", other)
}
},
DataType::Date32 => date_serializer(array.as_any().downcast_ref().unwrap(), date32_to_date),
DataType::Date64 => date_serializer(array.as_any().downcast_ref().unwrap(), date64_to_date),
DataType::Timestamp(tu, tz) => {
Expand Down
17 changes: 17 additions & 0 deletions tests/it/io/json/write.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use arrow2::datatypes::IntegerType;
use arrow2::{
array::*,
bitmap::Bitmap,
Expand Down Expand Up @@ -52,6 +53,22 @@ fn utf8() -> Result<()> {
test!(array, expected)
}

#[test]
fn dictionary_utf8() -> Result<()> {
let values = Utf8Array::<i64>::from([Some("a"), Some("b"), Some("c"), Some("d")]);
let keys = PrimitiveArray::from_slice([0u32, 1, 2, 3, 1]);
let array = DictionaryArray::try_new(
DataType::Dictionary(IntegerType::UInt32, Box::new(DataType::LargeUtf8), false),
keys,
Box::new(values),
)
.unwrap();

let expected = r#"["a","b","c","d","b"]"#;

test!(array, expected)
}

#[test]
fn struct_() -> Result<()> {
let c1 = Int32Array::from([Some(1), Some(2), Some(3), None, Some(5)]);
Expand Down

0 comments on commit db53304

Please sign in to comment.