Skip to content

Commit

Permalink
feat: support maps in MutableArrayData (#1379)
Browse files Browse the repository at this point in the history
Additionally, this allows the use fo `filter` on record batches and
arrays containing maps.
  • Loading branch information
helgikrs authored Mar 3, 2022
1 parent 4642b3e commit 97c1ca3
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 9 deletions.
175 changes: 166 additions & 9 deletions arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ fn build_extend(array: &ArrayData) -> Extend {
DataType::LargeUtf8 | DataType::LargeBinary => {
variable_size::build_extend::<i64>(array)
}
DataType::List(_) => list::build_extend::<i32>(array),
DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
DataType::LargeList(_) => list::build_extend::<i64>(array),
DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
DataType::Struct(_) => structure::build_extend(array),
Expand All @@ -276,7 +276,10 @@ fn build_extend(array: &ArrayData) -> Extend {
DataType::FixedSizeList(_, _) => {}
DataType::Union(_) => {}
*/
_ => todo!("Take and filter operations still not supported for this datatype"),
ty => todo!(
"Take and filter operations still not supported for this datatype: `{:?}`",
ty
),
}
}

Expand Down Expand Up @@ -307,7 +310,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
DataType::Interval(IntervalUnit::MonthDayNano) => primitive::extend_nulls::<i128>,
DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
DataType::List(_) => list::extend_nulls::<i32>,
DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
DataType::LargeList(_) => list::extend_nulls::<i64>,
DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
DataType::UInt8 => primitive::extend_nulls::<u8>,
Expand All @@ -327,7 +330,10 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
DataType::FixedSizeList(_, _) => {}
DataType::Union(_) => {}
*/
_ => todo!("Take and filter operations still not supported for this datatype"),
ty => todo!(
"Take and filter operations still not supported for this datatype: `{:?}`",
ty
),
})
}

Expand Down Expand Up @@ -452,7 +458,7 @@ impl<'a> MutableArrayData<'a> {
| DataType::LargeBinary
| DataType::Interval(_)
| DataType::FixedSizeBinary(_) => vec![],
DataType::List(_) | DataType::LargeList(_) => {
DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) => {
let childs = arrays
.iter()
.map(|array| &array.child_data()[0])
Expand Down Expand Up @@ -516,8 +522,8 @@ impl<'a> MutableArrayData<'a> {
})
.collect::<Vec<_>>(),
},
_ => {
todo!("Take and filter operations still not supported for this datatype")
ty => {
todo!("Take and filter operations still not supported for this datatype: `{:?}`", ty)
}
};

Expand Down Expand Up @@ -683,8 +689,8 @@ mod tests {
array::{
Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray,
FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array,
Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, StructArray, UInt8Array,
Int64Builder, ListBuilder, MapBuilder, NullArray, PrimitiveBuilder,
StringArray, StringDictionaryBuilder, StructArray, UInt8Array,
},
buffer::Buffer,
datatypes::Field,
Expand Down Expand Up @@ -1331,6 +1337,157 @@ mod tests {
Ok(())
}

#[test]
fn test_map_nulls_append() -> Result<()> {
let mut builder = MapBuilder::<Int64Builder, Int64Builder>::new(
None,
Int64Builder::new(32),
Int64Builder::new(32),
);
builder.keys().append_slice(&[1, 2, 3])?;
builder.values().append_slice(&[1, 2, 3])?;
builder.append(true)?;
builder.keys().append_slice(&[4, 5])?;
builder.values().append_slice(&[4, 5])?;
builder.append(true)?;
builder.append(false)?;
builder
.keys()
.append_slice(&[6, 7, 8, 100, 101, 9, 10, 11])?;
builder.values().append_slice(&[6, 7, 8])?;
builder.values().append_null()?;
builder.values().append_null()?;
builder.values().append_slice(&[9, 10, 11])?;
builder.append(true)?;

let a = builder.finish();
let a = a.data();

let mut builder = MapBuilder::<Int64Builder, Int64Builder>::new(
None,
Int64Builder::new(32),
Int64Builder::new(32),
);

builder.keys().append_slice(&[12, 13])?;
builder.values().append_slice(&[12, 13])?;
builder.append(true)?;
builder.append(false)?;
builder.append(true)?;
builder.keys().append_slice(&[100, 101, 14, 15])?;
builder.values().append_null()?;
builder.values().append_null()?;
builder.values().append_slice(&[14, 15])?;
builder.append(true)?;

let b = builder.finish();
let b = b.data();
let c = b.slice(1, 2);
let d = b.slice(2, 2);

let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10);

mutable.extend(0, 0, a.len());
mutable.extend(1, 0, b.len());
mutable.extend(2, 0, c.len());
mutable.extend(3, 0, d.len());
let result = mutable.freeze();

let expected_key_array = Int64Array::from(vec![
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
Some(6),
Some(7),
Some(8),
Some(100),
Some(101),
Some(9),
Some(10),
Some(11),
// second array
Some(12),
Some(13),
Some(100),
Some(101),
Some(14),
Some(15),
// slice(1, 2) results in no values added
Some(100),
Some(101),
Some(14),
Some(15),
]);

let expected_value_array = Int64Array::from(vec![
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
Some(6),
Some(7),
Some(8),
None,
None,
Some(9),
Some(10),
Some(11),
// second array
Some(12),
Some(13),
None,
None,
Some(14),
Some(15),
// slice(1, 2) results in no values added
None,
None,
Some(14),
Some(15),
]);

let expected_entry_array = StructArray::from(vec![
(
Field::new("keys", DataType::Int64, false),
Arc::new(expected_key_array) as ArrayRef,
),
(
Field::new("values", DataType::Int64, true),
Arc::new(expected_value_array) as ArrayRef,
),
]);

let map_offsets =
Buffer::from_slice_ref(&[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);

let expected_list_data = ArrayData::try_new(
DataType::Map(
Box::new(Field::new(
"entries",
DataType::Struct(vec![
Field::new("keys", DataType::Int64, false),
Field::new("values", DataType::Int64, true),
]),
false,
)),
false,
),
12,
None,
Some(Buffer::from(&[0b11011011, 0b1110])),
0,
vec![map_offsets],
vec![expected_entry_array.data().clone()],
)
.unwrap();
assert_eq!(result, expected_list_data);

Ok(())
}

#[test]
fn test_list_of_strings_append() -> Result<()> {
// [["alpha", "beta", None]]
Expand Down
37 changes: 37 additions & 0 deletions arrow/src/compute/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1484,4 +1484,41 @@ mod tests {
assert_eq!(actual, expected_strings);
}
}

#[test]
fn test_filter_map() {
let mut builder =
MapBuilder::new(None, StringBuilder::new(16), Int64Builder::new(4));
// [{"key1": 1}, {"key2": 2, "key3": 3}, null, {"key1": 1}
builder.keys().append_value("key1").unwrap();
builder.values().append_value(1).unwrap();
builder.append(true).unwrap();
builder.keys().append_value("key2").unwrap();
builder.keys().append_value("key3").unwrap();
builder.values().append_value(2).unwrap();
builder.values().append_value(3).unwrap();
builder.append(true).unwrap();
builder.append(false).unwrap();
builder.keys().append_value("key1").unwrap();
builder.values().append_value(1).unwrap();
builder.append(true).unwrap();
let maparray = Arc::new(builder.finish()) as ArrayRef;

let indices = vec![Some(true), Some(false), Some(false), Some(true)]
.into_iter()
.collect::<BooleanArray>();
let got = filter(&maparray, &indices).unwrap();

let mut builder =
MapBuilder::new(None, StringBuilder::new(8), Int64Builder::new(2));
builder.keys().append_value("key1").unwrap();
builder.values().append_value(1).unwrap();
builder.append(true).unwrap();
builder.keys().append_value("key1").unwrap();
builder.values().append_value(1).unwrap();
builder.append(true).unwrap();
let expected = Arc::new(builder.finish()) as ArrayRef;

assert_eq!(&expected, &got);
}
}

0 comments on commit 97c1ca3

Please sign in to comment.