From a8c38147aff4d1f74e842743d8e4ee27f21b855d Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sat, 18 May 2024 19:43:11 +0800 Subject: [PATCH 1/8] feat(functions): add new function: map_pick --- src/query/functions/src/scalars/map.rs | 37 +++++++++++ src/query/functions/tests/it/scalars/map.rs | 31 +++++++++ .../it/scalars/testdata/function_list.txt | 3 + .../tests/it/scalars/testdata/map.txt | 64 +++++++++++++++++++ .../query/functions/02_0074_function_map.test | 30 +++++++++ 5 files changed, 165 insertions(+) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index be3c4f5c74fd8..b2ac194c63992 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -244,4 +244,41 @@ pub fn register(registry: &mut FunctionRegistry) { .any(|(k, _)| k == key) }, ); + + registry.register_2_arg_core::( + "map_pick", + |_, _, _| FunctionDomain::Full, + |_, _, _| Value::Scalar(()), + ); + + registry.register_2_arg_core::>, EmptyMapType, _, _>( + "map_pick", + |_, _, _| FunctionDomain::Full, + |_, _, _| Value::Scalar(()), + ); + + registry.register_passthrough_nullable_2_arg( + "map_pick", + |_, domain1, domain2| { + FunctionDomain::Domain(match (domain1, domain2) { + (Some(domain1), _) => Some(domain1).cloned(), + (None, _) => None, + }) + }, + vectorize_with_builder_2_arg::< + MapType, GenericType<1>>, + ArrayType>, + MapType, GenericType<1>>, + >(|map, keys, output_map, ctx| { + let mut picked_map_builder = ArrayType::create_builder(keys.len(), ctx.generics); + for key in keys.iter() { + if let Some((k, v)) = map.iter().find(|(k, _)| k == &key) { + picked_map_builder.put_item((k.clone(), v.clone())); + } + } + + picked_map_builder.commit_row(); + output_map.append_column(&picked_map_builder.build()); + }), + ); } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 909bc3ddb8fe6..df91c05b5aa41 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -32,6 +32,7 @@ fn test_map() { test_map_size(file); test_map_cat(file); test_map_contains_key(file); + test_map_pick(file); } fn test_map_cat(file: &mut impl Write) { @@ -278,3 +279,33 @@ fn test_map_size(file: &mut impl Write) { &columns, ); } + +fn test_map_pick(file: &mut impl Write) { + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); + run_ast(file, "map_pick({}, [])", &[]); + run_ast(file, "map_pick({}, ['d'])", &[]); + + let columns = [ + ("a_col", StringType::from_data(vec!["a", "b", "c"])), + ("b_col", StringType::from_data(vec!["d", "e", "f"])), + ("c_col", StringType::from_data(vec!["x", "y", "z"])), + ( + "d_col", + StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]), + ), + ( + "e_col", + StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]), + ), + ( + "f_col", + StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]), + ), + ]; + run_ast( + file, + "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])", + &columns, + ); +} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index fd840cdff97f6..762392bbce698 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2443,6 +2443,9 @@ Functions overloads: 0 map_contains_key(Map(Nothing), T0) :: Boolean 1 map_contains_key(Map(T0, T1), T0) :: Boolean 2 map_contains_key(Map(T0, T1) NULL, T0 NULL) :: Boolean NULL +0 map_pick(Map(Nothing), Array(Nothing)) :: Map(Nothing) +1 map_pick(Map(T0, T1), Array(T0)) :: Map(T0, T1) +2 map_pick(Map(T0, T1) NULL, Array(T0) NULL) :: Map(T0, T1) NULL 0 map_keys(Map(Nothing)) :: Array(Nothing) 1 map_keys(Map(T0, T1)) :: Array(T0) 2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 7149c7a324dfd..65dc4e9418294 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -617,3 +617,67 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ +ast : map_pick({'a':1,'b':2,'c':3}, []) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array()) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), CAST(array<>() AS Array(String))) +optimized expr : {} +output type : Map(String, UInt8) +output domain : {} +output : {} + + +ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b']) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b')) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array("a", "b")) +optimized expr : {"a":1_u8, "b":2_u8} +output type : Map(String, UInt8) +output domain : {[{"a"..="b"}], [{1..=2}]} +output : {'a':1, 'b':2} + + +ast : map_pick({}, []) +raw expr : map_pick(map(array(), array()), array()) +checked expr : map_pick(map(array<>(), array<>()), array<>()) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + +ast : map_pick({}, ['d']) +raw expr : map_pick(map(array(), array()), array('d')) +checked expr : map_pick(map(array<>(), array<>()), array("d")) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + +ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b']) +raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), array('a', 'b')) +checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), array("a", "b")) +optimized expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), ['a', 'b']) +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index c629b74e135e0..808b4d425bb91 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -167,5 +167,35 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2') ---- 1 +# Test map_pick function +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3']) +---- +{'k1':'v1','k3':NULL} + +query +SELECT map_pick({}, ['k1', 'k2', 'k3']) +---- +{} + +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, []) +---- +{} + +statement ok +CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null) + +statement ok +INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null) + +query +SELECT map_pick(col_str, ['k1', 'k3']), map_pick(col_str, []), map_pick(col_int, ['a', 'b', 'c']) +FROM map_pick_test +---- +{'k1':'v1','k3':NULL} {} {'a':10,'b':20} +{} {} {} +{} {} NULL + statement ok DROP DATABASE map_func_test From 9b16d06a9590282cc6187161853cb19cf14037da Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Thu, 23 May 2024 09:15:31 +0800 Subject: [PATCH 2/8] feat(functions): add factory function for map_pick --- src/query/functions/src/scalars/map.rs | 80 +++++++++++++++++++ src/query/functions/tests/it/scalars/map.rs | 7 +- .../tests/it/scalars/testdata/map.txt | 66 +++++---------- .../query/functions/02_0074_function_map.test | 13 +-- 4 files changed, 104 insertions(+), 62 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index b2ac194c63992..600ab08dc3e84 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -14,11 +14,16 @@ use std::collections::HashSet; use std::hash::Hash; +use std::sync::Arc; +use databend_common_expression::types::array::ArrayColumnBuilder; +use databend_common_expression::types::map::KvPair; use databend_common_expression::types::nullable::NullableDomain; +use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; use databend_common_expression::types::BooleanType; +use databend_common_expression::types::DataType; use databend_common_expression::types::EmptyArrayType; use databend_common_expression::types::EmptyMapType; use databend_common_expression::types::GenericType; @@ -27,11 +32,20 @@ use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberType; use databend_common_expression::types::SimpleDomain; +use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; +use databend_common_expression::Column; +use databend_common_expression::EvalContext; +use databend_common_expression::Function; use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; +use databend_common_expression::FunctionSignature; +use databend_common_expression::Scalar; +use databend_common_expression::ScalarRef; use databend_common_expression::Value; +use databend_common_expression::ValueRef; use databend_common_hashtable::StackHashSet; use siphasher::sip128::Hasher128; use siphasher::sip128::SipHasher24; @@ -245,6 +259,72 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); + registry.register_function_factory("map_pick", |_, args_type: &[DataType]| { + if args_type.len() < 2 { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "map_pick".to_string(), + args_type: args_type.to_vec(), + return_type: args_type[0].clone(), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(move |_, _| FunctionDomain::Full), + eval: Box::new(map_pick_fn_vec), + }, + })) + }); + + fn map_pick_fn_vec(args: &[ValueRef], _: &mut EvalContext) -> Value { + let len = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + + let source_data_type = match args.first().unwrap() { + ValueRef::Scalar(s) => s.infer_data_type(), + ValueRef::Column(c) => c.data_type(), + }; + + let source_map = match &args[0] { + ValueRef::Scalar(ScalarRef::Map(s)) => { + KvPair::, GenericType<1>>::try_downcast_column(s).unwrap() + } + ValueRef::Column(Column::Map(c)) => { + KvPair::, GenericType<1>>::try_downcast_column(&c.values).unwrap() + } + _ => unreachable!(), + }; + + let mut builder: ArrayColumnBuilder, GenericType<1>>> = + ArrayType::create_builder( + args.len() - 1, + &source_data_type.as_map().unwrap().as_tuple().unwrap(), + ); + for key_arg in args[1..].iter() { + if let Some((k, v)) = source_map + .iter() + .find(|(k, _)| k == key_arg.as_scalar().unwrap()) + { + builder.put_item((k.clone(), v.clone())); + } + } + builder.commit_row(); + + match len { + Some(_) => Value::Column(Column::Map(Box::new(builder.build().upcast()))), + _ => { + let scalar_builder = builder.build_scalar(); + Value::Scalar(Scalar::Map(Column::Tuple(vec![ + scalar_builder.keys, + scalar_builder.values, + ]))) + } + } + } + registry.register_2_arg_core::( "map_pick", |_, _, _| FunctionDomain::Full, diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index df91c05b5aa41..20408c9d3450f 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -281,10 +281,7 @@ fn test_map_size(file: &mut impl Write) { } fn test_map_pick(file: &mut impl Write) { - run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]); - run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); - run_ast(file, "map_pick({}, [])", &[]); - run_ast(file, "map_pick({}, ['d'])", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]); let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), @@ -305,7 +302,7 @@ fn test_map_pick(file: &mut impl Write) { ]; run_ast( file, - "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])", + "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')", &columns, ); } diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 65dc4e9418294..3c782159a0c50 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -617,67 +617,37 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ -ast : map_pick({'a':1,'b':2,'c':3}, []) -raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array()) -checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), CAST(array<>() AS Array(String))) -optimized expr : {} -output type : Map(String, UInt8) -output domain : {} -output : {} - - -ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b']) -raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b')) -checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array("a", "b")) +ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b') +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), 'a', 'b') +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), "a", "b") optimized expr : {"a":1_u8, "b":2_u8} output type : Map(String, UInt8) output domain : {[{"a"..="b"}], [{1..=2}]} output : {'a':1, 'b':2} -ast : map_pick({}, []) -raw expr : map_pick(map(array(), array()), array()) -checked expr : map_pick(map(array<>(), array<>()), array<>()) -optimized expr : {} :: Map(Nothing) -output type : Map(Nothing) -output domain : {} -output : {} - - -ast : map_pick({}, ['d']) -raw expr : map_pick(map(array(), array()), array('d')) -checked expr : map_pick(map(array<>(), array<>()), array("d")) -optimized expr : {} :: Map(Nothing) -output type : Map(Nothing) -output domain : {} -output : {} - - -ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b']) -raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), array('a', 'b')) -checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), array("a", "b")) -optimized expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), ['a', 'b']) +ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b') +raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b') +checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "b") evaluation: +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ | | a_col | b_col | c_col | d_col | e_col | f_col | Output | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ | Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | | Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | -| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} | -| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} | -| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'b':'v2'} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 2] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index 808b4d425bb91..1e6b4d6abad66 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -169,17 +169,12 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2') # Test map_pick function query -SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3']) +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3') ---- {'k1':'v1','k3':NULL} query -SELECT map_pick({}, ['k1', 'k2', 'k3']) ----- -{} - -query -SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, []) +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '') ---- {} @@ -187,10 +182,10 @@ statement ok CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null) statement ok -INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null) +INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}) query -SELECT map_pick(col_str, ['k1', 'k3']), map_pick(col_str, []), map_pick(col_int, ['a', 'b', 'c']) +SELECT map_pick(col_str, 'k1', 'k3') FROM map_pick_test ---- {'k1':'v1','k3':NULL} {} {'a':10,'b':20} From 979fcd617f4a8da90a23e48d99358d7d0186f37c Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Fri, 24 May 2024 18:09:29 +0800 Subject: [PATCH 3/8] feat(functions): add more args_type check --- src/query/functions/src/scalars/map.rs | 50 +++++++++++++------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 600ab08dc3e84..1c0c723d18cab 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -264,6 +264,29 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } + if !matches!(args_type[0], DataType::Map(_) | DataType::EmptyMap) { + return None; + } + + let inner_key_type = match args_type.get(0) { + Some(DataType::Map(m)) => m.as_tuple().map(|tuple| &tuple[0]), + _ => None, + }; + let key_match = args_type[1..].iter().all(|arg_type| match inner_key_type { + Some(key_type) => arg_type == key_type, + None => matches!( + arg_type, + DataType::String + | DataType::Number(_) + | DataType::Decimal(_) + | DataType::Date + | DataType::Timestamp + ), + }); + if !key_match { + return None; + } + Some(Arc::new(Function { signature: FunctionSignature { name: "map_pick".to_string(), @@ -301,7 +324,7 @@ pub fn register(registry: &mut FunctionRegistry) { let mut builder: ArrayColumnBuilder, GenericType<1>>> = ArrayType::create_builder( args.len() - 1, - &source_data_type.as_map().unwrap().as_tuple().unwrap(), + source_data_type.as_map().unwrap().as_tuple().unwrap(), ); for key_arg in args[1..].iter() { if let Some((k, v)) = source_map @@ -336,29 +359,4 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _| FunctionDomain::Full, |_, _, _| Value::Scalar(()), ); - - registry.register_passthrough_nullable_2_arg( - "map_pick", - |_, domain1, domain2| { - FunctionDomain::Domain(match (domain1, domain2) { - (Some(domain1), _) => Some(domain1).cloned(), - (None, _) => None, - }) - }, - vectorize_with_builder_2_arg::< - MapType, GenericType<1>>, - ArrayType>, - MapType, GenericType<1>>, - >(|map, keys, output_map, ctx| { - let mut picked_map_builder = ArrayType::create_builder(keys.len(), ctx.generics); - for key in keys.iter() { - if let Some((k, v)) = map.iter().find(|(k, _)| k == &key) { - picked_map_builder.put_item((k.clone(), v.clone())); - } - } - - picked_map_builder.commit_row(); - output_map.append_column(&picked_map_builder.build()); - }), - ); } From 0b75c3e8e89ee349229f4bc6648dd1e8f054723d Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Fri, 24 May 2024 23:34:52 +0800 Subject: [PATCH 4/8] feat(functions): add args arrayType --- src/query/functions/src/scalars/map.rs | 67 +++++++++++++------ src/query/functions/tests/it/scalars/map.rs | 1 + .../tests/it/scalars/testdata/map.txt | 9 +++ 3 files changed, 58 insertions(+), 19 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 1c0c723d18cab..27ffaa153f237 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -268,21 +268,35 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } - let inner_key_type = match args_type.get(0) { + let inner_key_type = match args_type.first() { Some(DataType::Map(m)) => m.as_tuple().map(|tuple| &tuple[0]), _ => None, }; - let key_match = args_type[1..].iter().all(|arg_type| match inner_key_type { - Some(key_type) => arg_type == key_type, - None => matches!( - arg_type, - DataType::String - | DataType::Number(_) - | DataType::Decimal(_) - | DataType::Date - | DataType::Timestamp - ), - }); + let key_match = match args_type.len() { + 2 => args_type.get(1).map_or(false, |t| match t { + DataType::Array(_) => inner_key_type.map_or(false, |key_type| { + t.as_array() + .map_or(false, |array| array.as_ref() == key_type) + }), + DataType::EmptyArray => false, + _ => false, + }), + _ => args_type.iter().skip(1).all(|arg_type| { + inner_key_type.map_or_else( + || { + matches!( + arg_type, + DataType::String + | DataType::Number(_) + | DataType::Decimal(_) + | DataType::Date + | DataType::Timestamp + ) + }, + |key_type| arg_type == key_type, + ) + }), + }; if !key_match { return None; } @@ -312,9 +326,18 @@ pub fn register(registry: &mut FunctionRegistry) { }; let source_map = match &args[0] { - ValueRef::Scalar(ScalarRef::Map(s)) => { - KvPair::, GenericType<1>>::try_downcast_column(s).unwrap() - } + ValueRef::Scalar(s) => match s { + ScalarRef::Map(cols) => { + KvPair::, GenericType<1>>::try_downcast_column(cols).unwrap() + } + ScalarRef::EmptyMap => { + KvPair::, GenericType<1>>::try_downcast_column( + &Column::EmptyMap { len: 0 }, + ) + .unwrap() + } + _ => unreachable!(), + }, ValueRef::Column(Column::Map(c)) => { KvPair::, GenericType<1>>::try_downcast_column(&c.values).unwrap() } @@ -326,11 +349,17 @@ pub fn register(registry: &mut FunctionRegistry) { args.len() - 1, source_data_type.as_map().unwrap().as_tuple().unwrap(), ); - for key_arg in args[1..].iter() { - if let Some((k, v)) = source_map + let select_keys = match &args[1] { + ValueRef::Scalar(ScalarRef::Array(arr)) if args.len() == 2 => { + arr.iter().collect::>() + } + _ => args[1..] .iter() - .find(|(k, _)| k == key_arg.as_scalar().unwrap()) - { + .map(|arg| arg.as_scalar().unwrap().clone()) + .collect::>(), + }; + for key_arg in select_keys { + if let Some((k, v)) = source_map.iter().find(|(k, _)| k == &key_arg) { builder.put_item((k.clone(), v.clone())); } } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 20408c9d3450f..0d152a37b4312 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -282,6 +282,7 @@ fn test_map_size(file: &mut impl Write) { fn test_map_pick(file: &mut impl Write) { run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 3c782159a0c50..2d69d39a09056 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -626,6 +626,15 @@ output domain : {[{"a"..="b"}], [{1..=2}]} output : {'a':1, 'b':2} +ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b']) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b')) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array("a", "b")) +optimized expr : {"a":1_u8, "b":2_u8} +output type : Map(String, UInt8) +output domain : {[{"a"..="b"}], [{1..=2}]} +output : {'a':1, 'b':2} + + ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b') raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b') checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "b") From e20075286ba3a6a1d7f962a5e4abdd6e54982e2f Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 4 Nov 2024 23:09:43 +0800 Subject: [PATCH 5/8] fix --- src/query/functions/src/scalars/map.rs | 267 +++++++++++--------- src/query/functions/tests/it/scalars/map.rs | 1 - 2 files changed, 141 insertions(+), 127 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 886c80ca3c4c8..49d934a3c6d60 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -17,8 +17,8 @@ use std::hash::Hash; use std::sync::Arc; use databend_common_expression::types::array::ArrayColumnBuilder; -use databend_common_expression::types::map::KvPair; use databend_common_expression::types::map::KvColumn; +use databend_common_expression::types::map::KvPair; use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; @@ -37,8 +37,8 @@ use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::Column; -use databend_common_expression::EvalContext; use databend_common_expression::ColumnBuilder; +use databend_common_expression::EvalContext; use databend_common_expression::Function; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionEval; @@ -66,11 +66,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_2_arg::>, ArrayType>, MapType, GenericType<1>>>( |keys, vals, output, ctx| { let key_type = &ctx.generics[0]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() { + if !check_valid_map_key_type(key_type) { ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } else if keys.len() != vals.len() { ctx.set_error(output.len(), format!( @@ -266,12 +262,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } else { let key_type = &args_type[1]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() - { + if !check_valid_map_key_type(key_type) { return None; } for arg_type in args_type.iter().skip(2) { @@ -302,19 +293,19 @@ pub fn register(registry: &mut FunctionRegistry) { let mut output_map_builder = ColumnBuilder::with_capacity(&return_type, input_length.unwrap_or(1)); + let mut delete_key_list = HashSet::new(); for idx in 0..(input_length.unwrap_or(1)) { - let input_map_sref = match &args[0] { + let input_map = match &args[0] { ValueRef::Scalar(map) => map.clone(), ValueRef::Column(map) => unsafe { map.index_unchecked(idx) }, }; - match &input_map_sref { + match &input_map { ScalarRef::Null | ScalarRef::EmptyMap => { output_map_builder.push_default(); } ScalarRef::Map(col) => { - let mut delete_key_list = HashSet::new(); - + delete_key_list.clear(); for input_key_item in args.iter().skip(1) { let input_key = match &input_key_item { ValueRef::Scalar(scalar) => scalar.clone(), @@ -326,7 +317,7 @@ pub fn register(registry: &mut FunctionRegistry) { delete_key_list.insert(input_key.to_owned()); } - let inner_builder_type = match input_map_sref.infer_data_type() { + let inner_builder_type = match input_map.infer_data_type() { DataType::Map(box typ) => typ, _ => unreachable!(), }; @@ -335,7 +326,7 @@ pub fn register(registry: &mut FunctionRegistry) { ColumnBuilder::with_capacity(&inner_builder_type, col.len()); let input_map: KvColumn = - MapType::try_downcast_scalar(&input_map_sref).unwrap(); + MapType::try_downcast_scalar(&input_map).unwrap(); input_map.iter().for_each(|(map_key, map_value)| { if !delete_key_list.contains(&map_key.to_owned()) { @@ -382,42 +373,51 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } - if !matches!(args_type[0], DataType::Map(_) | DataType::EmptyMap) { - return None; - } - - let inner_key_type = match args_type.first() { - Some(DataType::Map(m)) => m.as_tuple().map(|tuple| &tuple[0]), - _ => None, + let map_key_type = match args_type[0].remove_nullable() { + DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { + Some(type_tuple[0].clone()) + } + DataType::EmptyMap => None, + _ => return None, }; - let key_match = match args_type.len() { - 2 => args_type.get(1).map_or(false, |t| match t { - DataType::Array(_) => inner_key_type.map_or(false, |key_type| { - t.as_array() - .map_or(false, |array| array.as_ref() == key_type) - }), - DataType::EmptyArray => false, - _ => false, - }), - _ => args_type.iter().skip(1).all(|arg_type| { - inner_key_type.map_or_else( - || { - matches!( - arg_type, - DataType::String - | DataType::Number(_) - | DataType::Decimal(_) - | DataType::Date - | DataType::Timestamp - ) - }, - |key_type| arg_type == key_type, - ) - }), + + // the second argument can be an array of keys. + let (is_array, array_key_type) = match args_type[1].remove_nullable() { + DataType::Array(box key_type) => (true, Some(key_type.clone())), + DataType::EmptyArray => (true, None), + _ => (false, None), }; - if !key_match { - return None; + + if let Some(map_key_type) = map_key_type { + if is_array { + if args_type.len() != 2 || map_key_type != array_key_type { + return None; + } + } else { + for arg_type in args_type.iter().skip(1) { + if arg_type != &map_key_type { + return None; + } + } + } + } else { + if is_array { + if args_type.len() != 2 || !check_valid_map_key_type(array_key_type) { + return None; + } + } else { + let key_type = &args_type[1]; + if !check_valid_map_key_type(key_type) { + return None; + } + for arg_type in args_type.iter().skip(2) { + if arg_type != key_type { + return None; + } + } + } } + let return_type = args_type[0].clone(); Some(Arc::new(Function { signature: FunctionSignature { @@ -426,84 +426,99 @@ pub fn register(registry: &mut FunctionRegistry) { return_type: args_type[0].clone(), }, eval: FunctionEval::Scalar { - calc_domain: Box::new(move |_, _| FunctionDomain::Full), - eval: Box::new(map_pick_fn_vec), - }, - })) - }); + calc_domain: Box::new(|_, args_domain| { + FunctionDomain::Domain(args_domain[0].clone()) + }), + eval: Box::new(move |args, _ctx| { + let input_length = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); - fn map_pick_fn_vec(args: &[ValueRef], _: &mut EvalContext) -> Value { - let len = args.iter().find_map(|arg| match arg { - ValueRef::Column(col) => Some(col.len()), - _ => None, - }); + let mut output_map_builder = + ColumnBuilder::with_capacity(&return_type, input_length.unwrap_or(1)); - let source_data_type = match args.first().unwrap() { - ValueRef::Scalar(s) => s.infer_data_type(), - ValueRef::Column(c) => c.data_type(), - }; + let mut pick_key_list = HashSet::new(); + for idx in 0..(input_length.unwrap_or(1)) { + let input_map = match &args[0] { + ValueRef::Scalar(map) => map.clone(), + ValueRef::Column(map) => unsafe { map.index_unchecked(idx) }, + }; - let source_map = match &args[0] { - ValueRef::Scalar(s) => match s { - ScalarRef::Map(cols) => { - KvPair::, GenericType<1>>::try_downcast_column(cols).unwrap() - } - ScalarRef::EmptyMap => { - KvPair::, GenericType<1>>::try_downcast_column( - &Column::EmptyMap { len: 0 }, - ) - .unwrap() - } - _ => unreachable!(), - }, - ValueRef::Column(Column::Map(c)) => { - KvPair::, GenericType<1>>::try_downcast_column(&c.values).unwrap() - } - _ => unreachable!(), - }; + match &input_map { + ScalarRef::Null | ScalarRef::EmptyMap => { + output_map_builder.push_default(); + } + ScalarRef::Map(col) => { + pick_key_list.clear(); + for input_key_item in args.iter().skip(1) { + let input_key = match &input_key_item { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { + col.index_unchecked(idx) + }, + }; + match input_key { + ScalarRef::EmptyArray | ScalarRef::Null => {} + ScalarRef::Array(arr_col) => { + for arr_key in arr_col.iter() { + if arr_key == ScalarRef::Null { + continue; + } + pick_key_list.insert(arr_key.to_owned()); + } + } + _ => { + pick_key_list.insert(input_key.to_owned()); + } + } + } - let mut builder: ArrayColumnBuilder, GenericType<1>>> = - ArrayType::create_builder( - args.len() - 1, - source_data_type.as_map().unwrap().as_tuple().unwrap(), - ); - let select_keys = match &args[1] { - ValueRef::Scalar(ScalarRef::Array(arr)) if args.len() == 2 => { - arr.iter().collect::>() - } - _ => args[1..] - .iter() - .map(|arg| arg.as_scalar().unwrap().clone()) - .collect::>(), - }; - for key_arg in select_keys { - if let Some((k, v)) = source_map.iter().find(|(k, _)| k == &key_arg) { - builder.put_item((k.clone(), v.clone())); - } - } - builder.commit_row(); - - match len { - Some(_) => Value::Column(Column::Map(Box::new(builder.build().upcast()))), - _ => { - let scalar_builder = builder.build_scalar(); - Value::Scalar(Scalar::Map(Column::Tuple(vec![ - scalar_builder.keys, - scalar_builder.values, - ]))) - } - } - } + let inner_builder_type = match input_map.infer_data_type() { + DataType::Map(box typ) => typ, + _ => unreachable!(), + }; - registry.register_2_arg_core::( - "map_pick", - |_, _, _| FunctionDomain::Full, - |_, _, _| Value::Scalar(()), - ); + let mut filtered_kv_builder = + ColumnBuilder::with_capacity(&inner_builder_type, col.len()); - registry.register_2_arg_core::>, EmptyMapType, _, _>( - "map_pick", - |_, _, _| FunctionDomain::Full, - |_, _, _| Value::Scalar(()), - ); + let input_map: KvColumn = + MapType::try_downcast_scalar(&input_map).unwrap(); + + input_map.iter().for_each(|(map_key, map_value)| { + if pick_key_list.contains(&map_key.to_owned()) { + filtered_kv_builder.push(ScalarRef::Tuple(vec![ + map_key.clone(), + map_value.clone(), + ])); + } + }); + output_map_builder + .push(ScalarRef::Map(filtered_kv_builder.build())); + } + _ => unreachable!(), + } + } + + match input_length { + Some(_) => Value::Column(output_map_builder.build()), + None => Value::Scalar(output_map_builder.build_scalar()), + } + }), + }, + })) + }); +} + +fn check_valid_map_key_type(key_type: &DataType) -> bool { + if key_type.is_boolean() + || key_type.is_string() + || key_type.is_numeric() + || key_type.is_decimal() + || key_type.is_date_or_date_time() + { + true + } else { + false + } } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index b30f1118ce942..ca36bc3c66ade 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -410,4 +410,3 @@ fn test_map_pick(file: &mut impl Write) { &columns, ); } - From 90d05d6161f607a0a5d661f4cbe6fd68ebf29dc5 Mon Sep 17 00:00:00 2001 From: b41sh Date: Mon, 4 Nov 2024 23:52:12 +0800 Subject: [PATCH 6/8] fix --- src/query/functions/src/scalars/map.rs | 173 ++++++++---------- src/query/functions/tests/it/scalars/map.rs | 9 + .../it/scalars/testdata/function_list.txt | 4 +- 3 files changed, 88 insertions(+), 98 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 49d934a3c6d60..8b6eff74c96d4 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -16,9 +16,7 @@ use std::collections::HashSet; use std::hash::Hash; use std::sync::Arc; -use databend_common_expression::types::array::ArrayColumnBuilder; use databend_common_expression::types::map::KvColumn; -use databend_common_expression::types::map::KvPair; use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; @@ -36,15 +34,12 @@ use databend_common_expression::types::SimpleDomain; use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; -use databend_common_expression::EvalContext; use databend_common_expression::Function; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; -use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use databend_common_expression::Value; use databend_common_expression::ValueRef; @@ -242,38 +237,7 @@ pub fn register(registry: &mut FunctionRegistry) { ); registry.register_function_factory("map_delete", |_, args_type| { - if args_type.len() < 2 { - return None; - } - - let map_key_type = match args_type[0].remove_nullable() { - DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { - Some(type_tuple[0].clone()) - } - DataType::EmptyMap => None, - _ => return None, - }; - - if let Some(map_key_type) = map_key_type { - for arg_type in args_type.iter().skip(1) { - if arg_type != &map_key_type { - return None; - } - } - } else { - let key_type = &args_type[1]; - if !check_valid_map_key_type(key_type) { - return None; - } - for arg_type in args_type.iter().skip(2) { - if arg_type != key_type { - return None; - } - } - } - - let return_type = args_type[0].clone(); - + let return_type = check_map_arg_types(args_type)?; Some(Arc::new(Function { signature: FunctionSignature { name: "map_delete".to_string(), @@ -313,8 +277,20 @@ pub fn register(registry: &mut FunctionRegistry) { col.index_unchecked(idx) }, }; - - delete_key_list.insert(input_key.to_owned()); + match input_key { + ScalarRef::EmptyArray | ScalarRef::Null => {} + ScalarRef::Array(arr_col) => { + for arr_key in arr_col.iter() { + if arr_key == ScalarRef::Null { + continue; + } + delete_key_list.insert(arr_key.to_owned()); + } + } + _ => { + delete_key_list.insert(input_key.to_owned()); + } + } } let inner_builder_type = match input_map.infer_data_type() { @@ -369,56 +345,7 @@ pub fn register(registry: &mut FunctionRegistry) { ); registry.register_function_factory("map_pick", |_, args_type: &[DataType]| { - if args_type.len() < 2 { - return None; - } - - let map_key_type = match args_type[0].remove_nullable() { - DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { - Some(type_tuple[0].clone()) - } - DataType::EmptyMap => None, - _ => return None, - }; - - // the second argument can be an array of keys. - let (is_array, array_key_type) = match args_type[1].remove_nullable() { - DataType::Array(box key_type) => (true, Some(key_type.clone())), - DataType::EmptyArray => (true, None), - _ => (false, None), - }; - - if let Some(map_key_type) = map_key_type { - if is_array { - if args_type.len() != 2 || map_key_type != array_key_type { - return None; - } - } else { - for arg_type in args_type.iter().skip(1) { - if arg_type != &map_key_type { - return None; - } - } - } - } else { - if is_array { - if args_type.len() != 2 || !check_valid_map_key_type(array_key_type) { - return None; - } - } else { - let key_type = &args_type[1]; - if !check_valid_map_key_type(key_type) { - return None; - } - for arg_type in args_type.iter().skip(2) { - if arg_type != key_type { - return None; - } - } - } - } - let return_type = args_type[0].clone(); - + let return_type = check_map_arg_types(args_type)?; Some(Arc::new(Function { signature: FunctionSignature { name: "map_pick".to_string(), @@ -510,15 +437,71 @@ pub fn register(registry: &mut FunctionRegistry) { }); } +// Check map function arg types +// 1. The first arg must be a Map or EmptyMap. +// 2. The second arg can be an Array or EmptyArray. +// 3. Multiple args with same key type is also valid. +fn check_map_arg_types(args_type: &[DataType]) -> Option { + if args_type.len() < 2 { + return None; + } + + let map_key_type = match args_type[0].remove_nullable() { + DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { + Some(type_tuple[0].clone()) + } + DataType::EmptyMap => None, + _ => return None, + }; + + // the second argument can be an array of keys. + let (is_array, array_key_type) = match args_type[1].remove_nullable() { + DataType::Array(box key_type) => (true, Some(key_type.clone())), + DataType::EmptyArray => (true, None), + _ => (false, None), + }; + if is_array && args_type.len() != 2 { + return None; + } + if let Some(map_key_type) = map_key_type { + if is_array { + if let Some(array_key_type) = array_key_type { + if map_key_type != array_key_type { + return None; + } + } + } else { + for arg_type in args_type.iter().skip(1) { + if arg_type != &map_key_type { + return None; + } + } + } + } else if is_array { + if let Some(array_key_type) = array_key_type { + if !check_valid_map_key_type(&array_key_type) { + return None; + } + } + } else { + let key_type = &args_type[1]; + if !check_valid_map_key_type(key_type) { + return None; + } + for arg_type in args_type.iter().skip(2) { + if arg_type != key_type { + return None; + } + } + } + let return_type = args_type[0].clone(); + Some(return_type) +} + fn check_valid_map_key_type(key_type: &DataType) -> bool { - if key_type.is_boolean() + key_type.is_boolean() || key_type.is_string() || key_type.is_numeric() || key_type.is_decimal() || key_type.is_date_or_date_time() - { - true - } else { - false - } } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index ca36bc3c66ade..7b2f0f0fd6d46 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -297,6 +297,11 @@ fn test_map_delete(file: &mut impl Write) { "map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, 'k3', 'k2')", &[], ); + run_ast( + file, + "map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2'])", + &[], + ); // Deleting keys from a nested map let columns = [ @@ -386,6 +391,10 @@ fn test_map_delete(file: &mut impl Write) { fn test_map_pick(file: &mut impl Write) { run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]); run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]); + run_ast(file, "map_pick({1:'a',2:'b',3:'c'}, 1, 3)", &[]); + run_ast(file, "map_pick({}, 'a', 'b')", &[]); + run_ast(file, "map_pick({}, [])", &[]); let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 550334abfad0f..88040668a6126 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2543,13 +2543,11 @@ Functions overloads: 0 map_contains_key(Map(Nothing), T0) :: Boolean 1 map_contains_key(Map(T0, T1), T0) :: Boolean 2 map_contains_key(Map(T0, T1) NULL, T0 NULL) :: Boolean NULL -0 map_pick(Map(Nothing), Array(Nothing)) :: Map(Nothing) -1 map_pick(Map(T0, T1), Array(T0)) :: Map(T0, T1) -2 map_pick(Map(T0, T1) NULL, Array(T0) NULL) :: Map(T0, T1) NULL 0 map_delete FACTORY 0 map_keys(Map(Nothing)) :: Array(Nothing) 1 map_keys(Map(T0, T1)) :: Array(T0) 2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL +0 map_pick FACTORY 0 map_size(Map(Nothing)) :: UInt8 1 map_size(Map(T0, T1)) :: UInt64 2 map_size(Map(T0, T1) NULL) :: UInt64 NULL From 12694f6e8a547a0a1d1391f5a456165e09b89d00 Mon Sep 17 00:00:00 2001 From: b41sh Date: Tue, 5 Nov 2024 09:44:44 +0800 Subject: [PATCH 7/8] fix tests --- src/query/functions/src/scalars/map.rs | 26 ++++--- .../tests/it/scalars/testdata/map.txt | 71 +++++++++++++++---- .../query/functions/02_0074_function_map.test | 35 +++++++-- 3 files changed, 105 insertions(+), 27 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 8b6eff74c96d4..a71e2aadda59c 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -292,6 +292,10 @@ pub fn register(registry: &mut FunctionRegistry) { } } } + if delete_key_list.is_empty() { + output_map_builder.push(input_map); + continue; + } let inner_builder_type = match input_map.infer_data_type() { DataType::Map(box typ) => typ, @@ -400,6 +404,10 @@ pub fn register(registry: &mut FunctionRegistry) { } } } + if pick_key_list.is_empty() { + output_map_builder.push_default(); + continue; + } let inner_builder_type = match input_map.infer_data_type() { DataType::Map(box typ) => typ, @@ -456,7 +464,7 @@ fn check_map_arg_types(args_type: &[DataType]) -> Option { // the second argument can be an array of keys. let (is_array, array_key_type) = match args_type[1].remove_nullable() { - DataType::Array(box key_type) => (true, Some(key_type.clone())), + DataType::Array(box key_type) => (true, Some(key_type.remove_nullable())), DataType::EmptyArray => (true, None), _ => (false, None), }; @@ -466,30 +474,28 @@ fn check_map_arg_types(args_type: &[DataType]) -> Option { if let Some(map_key_type) = map_key_type { if is_array { if let Some(array_key_type) = array_key_type { - if map_key_type != array_key_type { + if array_key_type != DataType::Null && array_key_type != map_key_type { return None; } } } else { for arg_type in args_type.iter().skip(1) { - if arg_type != &map_key_type { + let arg_type = arg_type.remove_nullable(); + if arg_type != DataType::Null && arg_type != map_key_type { return None; } } } } else if is_array { if let Some(array_key_type) = array_key_type { - if !check_valid_map_key_type(&array_key_type) { + if array_key_type != DataType::Null && !check_valid_map_key_type(&array_key_type) { return None; } } } else { - let key_type = &args_type[1]; - if !check_valid_map_key_type(key_type) { - return None; - } - for arg_type in args_type.iter().skip(2) { - if arg_type != key_type { + for arg_type in args_type.iter().skip(1) { + let arg_type = arg_type.remove_nullable(); + if arg_type != DataType::Null && !check_valid_map_key_type(&arg_type) { return None; } } diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 3766a4fddec13..9201a7ad3b00d 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -578,6 +578,15 @@ output domain : {[{"k1"..="k4"}], [{"v1"..="v4"}]} output : {'k1':'v1', 'k4':'v4'} +ast : map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2']) +raw expr : map_delete(map(array('k1', 'k2', 'k3', 'k4'), array('v1', 'v2', 'v3', 'v4')), array('k3', 'k2')) +checked expr : map_delete(map(array("k1", "k2", "k3", "k4"), array("v1", "v2", "v3", "v4")), array("k3", "k2")) +optimized expr : {"k1":"v1", "k4":"v4"} +output type : Map(String, String) +output domain : {[{"k1"..="k4"}], [{"v1"..="v4"}]} +output : {'k1':'v1', 'k4':'v4'} + + ast : map_delete(map([a_col, b_col], [d_col, e_col]), 'a_k2', 'b_k3') raw expr : map_delete(map(array(a_col::String, b_col::String), array(d_col::String, e_col::String)), 'a_k2', 'b_k3') checked expr : map_delete(map(array(a_col, b_col), array(d_col, e_col)), "a_k2", "b_k3") @@ -801,6 +810,42 @@ output domain : {[{"a"..="b"}], [{1..=2}]} output : {'a':1, 'b':2} +ast : map_pick({'a':1,'b':2,'c':3}, []) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array()) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array<>()) +optimized expr : {} +output type : Map(String, UInt8) +output domain : {} +output : {} + + +ast : map_pick({1:'a',2:'b',3:'c'}, 1, 3) +raw expr : map_pick(map(array(1, 2, 3), array('a', 'b', 'c')), 1, 3) +checked expr : map_pick(map(array(1_u8, 2_u8, 3_u8), array("a", "b", "c")), 1_u8, 3_u8) +optimized expr : {1_u8:"a", 3_u8:"c"} +output type : Map(UInt8, String) +output domain : {[{1..=3}], [{"a"..="c"}]} +output : {1:'a', 3:'c'} + + +ast : map_pick({}, 'a', 'b') +raw expr : map_pick(map(array(), array()), 'a', 'b') +checked expr : map_pick(map(array<>(), array<>()), "a", "b") +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + +ast : map_pick({}, []) +raw expr : map_pick(map(array(), array()), array()) +checked expr : map_pick(map(array<>(), array<>()), array<>()) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b') raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b') checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "b") @@ -810,19 +855,21 @@ evaluation: +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ | Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | | Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | -| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'b':'v2'} | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 2] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index f28e3df7cf410..5fc29b96f1f8d 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -144,6 +144,15 @@ SELECT MAP_DELETE( ---- {'k1':'v1','k4':'v4'} +query +SELECT MAP_DELETE({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2', 'k10']) +---- +{'k1':'v1','k4':'v4'} + +query +SELECT MAP_DELETE({}, 'k1', 'k2') +---- +{} # Deleting keys from a nested map statement ok @@ -230,11 +239,29 @@ SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3') ---- {'k1':'v1','k3':NULL} +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3', 'k10']) +---- +{'k1':'v1','k3':NULL} + query SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '') ---- {} +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, []) +---- +{} + +query +SELECT map_pick({}, 1, 2) +---- +{} + +statement ok +DROP TABLE IF EXISTS map_pick_test + statement ok CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null) @@ -242,12 +269,10 @@ statement ok INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}) query -SELECT map_pick(col_str, 'k1', 'k3') -FROM map_pick_test +SELECT map_pick(col_str, 'k1', 'k3'), map_pick(col_int, ['a', 'e', 'x']) FROM map_pick_test ---- -{'k1':'v1','k3':NULL} {} {'a':10,'b':20} -{} {} {} -{} {} NULL +{'k1':'v1','k3':NULL} {'a':10} +{} {'e':NULL} # Test map_filter query T From f3fee9114142e9650c257ab82555d6f2891e2205 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 5 Nov 2024 10:19:55 +0800 Subject: [PATCH 8/8] fix tests --- .../functions/tests/it/scalars/testdata/map.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 9201a7ad3b00d..e8c3d262cae0c 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -553,12 +553,13 @@ error: -error: - --> SQL:1:1 - | -1 | map_delete({}, NULL, NULL) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ no function matches signature `map_delete(Map(Nothing), NULL, NULL)`, you might need to add explicit type casts. - +ast : map_delete({}, NULL, NULL) +raw expr : map_delete(map(array(), array()), NULL, NULL) +checked expr : map_delete(map(array<>(), array<>()), NULL, NULL) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} error: