Skip to content

Commit

Permalink
feat(functions): add new function: map_pick
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxuanliang committed May 24, 2024
1 parent 0604f10 commit 87cc190
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/query/functions/src/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,4 +244,41 @@ pub fn register(registry: &mut FunctionRegistry) {
.any(|(k, _)| k == key)
},
);

registry.register_2_arg_core::<EmptyMapType, EmptyArrayType, EmptyMapType, _, _>(
"map_pick",
|_, _, _| FunctionDomain::Full,
|_, _, _| Value::Scalar(()),
);

registry.register_2_arg_core::<EmptyMapType, ArrayType<GenericType<0>>, EmptyMapType, _, _>(
"map_pick",
|_, _, _| FunctionDomain::Full,
|_, _, _| Value::Scalar(()),
);

registry.register_passthrough_nullable_2_arg(
"map_pick",
|_, domain1, domain2| {
FunctionDomain::Domain(match (domain1, domain2) {
(Some(domain1), _) => Some(domain1).cloned(),
(None, _) => None,
})
},
vectorize_with_builder_2_arg::<
MapType<GenericType<0>, GenericType<1>>,
ArrayType<GenericType<0>>,
MapType<GenericType<0>, GenericType<1>>,
>(|map, keys, output_map, ctx| {
let mut picked_map_builder = ArrayType::create_builder(keys.len(), ctx.generics);
for key in keys.iter() {
if let Some((k, v)) = map.iter().find(|(k, _)| k == &key) {
picked_map_builder.put_item((k.clone(), v.clone()));
}
}

picked_map_builder.commit_row();
output_map.append_column(&picked_map_builder.build());
}),
);
}
31 changes: 31 additions & 0 deletions src/query/functions/tests/it/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ fn test_map() {
test_map_size(file);
test_map_cat(file);
test_map_contains_key(file);
test_map_pick(file);
}

fn test_map_cat(file: &mut impl Write) {
Expand Down Expand Up @@ -278,3 +279,33 @@ fn test_map_size(file: &mut impl Write) {
&columns,
);
}

fn test_map_pick(file: &mut impl Write) {
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]);
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]);
run_ast(file, "map_pick({}, [])", &[]);
run_ast(file, "map_pick({}, ['d'])", &[]);

let columns = [
("a_col", StringType::from_data(vec!["a", "b", "c"])),
("b_col", StringType::from_data(vec!["d", "e", "f"])),
("c_col", StringType::from_data(vec!["x", "y", "z"])),
(
"d_col",
StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]),
),
(
"e_col",
StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]),
),
(
"f_col",
StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]),
),
];
run_ast(
file,
"map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])",
&columns,
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2443,6 +2443,9 @@ Functions overloads:
0 map_contains_key(Map(Nothing), T0) :: Boolean
1 map_contains_key(Map(T0, T1), T0) :: Boolean
2 map_contains_key(Map(T0, T1) NULL, T0 NULL) :: Boolean NULL
0 map_pick(Map(Nothing), Array(Nothing)) :: Map(Nothing)
1 map_pick(Map(T0, T1), Array(T0)) :: Map(T0, T1)
2 map_pick(Map(T0, T1) NULL, Array(T0) NULL) :: Map(T0, T1) NULL
0 map_keys(Map(Nothing)) :: Array(Nothing)
1 map_keys(Map(T0, T1)) :: Array(T0)
2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL
Expand Down
64 changes: 64 additions & 0 deletions src/query/functions/tests/it/scalars/testdata/map.txt
Original file line number Diff line number Diff line change
Expand Up @@ -617,3 +617,67 @@ evaluation (internal):
+--------+-----------------------------------------------------------------------------------------------------------------+


ast : map_pick({'a':1,'b':2,'c':3}, [])
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array())
checked expr : map_pick<T0=String, T1=UInt8><Map(T0, T1), Array(T0)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), CAST(array<>() AS Array(String)))
optimized expr : {}
output type : Map(String, UInt8)
output domain : {}
output : {}


ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b'))
checked expr : map_pick<T0=String, T1=UInt8><Map(T0, T1), Array(T0)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), array<T0=String><T0, T0>("a", "b"))
optimized expr : {"a":1_u8, "b":2_u8}
output type : Map(String, UInt8)
output domain : {[{"a"..="b"}], [{1..=2}]}
output : {'a':1, 'b':2}


ast : map_pick({}, [])
raw expr : map_pick(map(array(), array()), array())
checked expr : map_pick<Map(Nothing), Array(Nothing)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()), array<>())
optimized expr : {} :: Map(Nothing)
output type : Map(Nothing)
output domain : {}
output : {}


ast : map_pick({}, ['d'])
raw expr : map_pick(map(array(), array()), array('d'))
checked expr : map_pick<T0=String><Map(Nothing), Array(T0)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()), array<T0=String><T0>("d"))
optimized expr : {} :: Map(Nothing)
output type : Map(Nothing)
output domain : {}
output : {}


ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])
raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), array('a', 'b'))
checked expr : map_pick<T0=String, T1=String NULL><Map(T0, T1), Array(T0)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), array<T0=String><T0, T0>("a", "b"))
optimized expr : map_pick<T0=String, T1=String NULL><Map(T0, T1), Array(T0)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), ['a', 'b'])
evaluation:
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| | a_col | b_col | c_col | d_col | e_col | f_col | Output |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) |
| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} |
| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} |
| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
evaluation (internal):
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+


Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,35 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2')
----
1

# Test map_pick function
query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3'])
----
{'k1':'v1','k3':NULL}

query
SELECT map_pick({}, ['k1', 'k2', 'k3'])
----
{}

query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, [])
----
{}

statement ok
CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null)

statement ok
INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null)

query
SELECT map_pick(col_str, ['k1', 'k3']), map_pick(col_str, []), map_pick(col_int, ['a', 'b', 'c'])
FROM map_pick_test
----
{'k1':'v1','k3':NULL} {} {'a':10,'b':20}
{} {} {}
{} {} NULL

statement ok
DROP DATABASE map_func_test

0 comments on commit 87cc190

Please sign in to comment.