diff --git a/be/src/exprs/bitmap_functions.cpp b/be/src/exprs/bitmap_functions.cpp index 7e5150355bede..c1f6aee0d78e7 100644 --- a/be/src/exprs/bitmap_functions.cpp +++ b/be/src/exprs/bitmap_functions.cpp @@ -484,7 +484,7 @@ StatusOr BitmapFunctions::bitmap_min(FunctionContext* context, const return builder.build(ColumnHelper::is_all_const(columns)); } -StatusOr BitmapFunctions::base64_to_bitmap(FunctionContext* context, const starrocks::Columns& columns) { +StatusOr BitmapFunctions::base64_to_bitmap(FunctionContext* context, const Columns& columns) { ColumnViewer viewer(columns[0]); size_t size = columns[0]->size(); ColumnBuilder builder(size); @@ -675,4 +675,57 @@ StatusOr BitmapFunctions::bitmap_subset_in_range(FunctionContext* con return builder.build(ColumnHelper::is_all_const(columns)); } +StatusOr BitmapFunctions::bitmap_to_binary(FunctionContext* context, const starrocks::Columns& columns) { + ColumnViewer viewer(columns[0]); + + size_t size = columns[0]->size(); + ColumnBuilder builder(size); + + raw::RawString buf; + for (int row = 0; row < size; ++row) { + BitmapValue* bitmap = viewer.value(row); + size_t serialize_size = bitmap->getSizeInBytes(); + buf.resize(serialize_size); + bitmap->write(buf.data()); + builder.append(Slice(buf.data(), serialize_size)); + } + + ColumnPtr col = builder.build(ColumnHelper::is_all_const(columns)); + std::string err_msg; + if (col->capacity_limit_reached(&err_msg)) { + return Status::InternalError( + strings::Substitute("Size of binary column generated by bitmap_to_binary reaches limit: $0", err_msg)); + } else { + return col; + } +} + +StatusOr BitmapFunctions::bitmap_from_binary(FunctionContext* context, const Columns& columns) { + ColumnViewer viewer(columns[0]); + size_t size = columns[0]->size(); + ColumnBuilder builder(size); + + for (int row = 0; row < size; ++row) { + if (viewer.is_null(row)) { + builder.append_null(); + continue; + } + + auto src_value = viewer.value(row); + if (src_value.size == 0) { + builder.append_null(); + continue; + } + + BitmapValue bitmap; + bool res = bitmap.valid_and_deserialize(src_value.data, src_value.size); + if (!res) { + builder.append_null(); + } else { + builder.append(std::move(bitmap)); + } + } + return builder.build(ColumnHelper::is_all_const(columns)); +} + } // namespace starrocks diff --git a/be/src/exprs/bitmap_functions.h b/be/src/exprs/bitmap_functions.h index 6cdde2c8383c5..9feca4a6d07e2 100644 --- a/be/src/exprs/bitmap_functions.h +++ b/be/src/exprs/bitmap_functions.h @@ -175,6 +175,20 @@ class BitmapFunctions { * @return TYPE_OBJECT */ DEFINE_VECTORIZED_FN(bitmap_subset_limit); + + /** + * @param: + * @paramType columns: [TYPE_BITMAP] + * @return TYPE_VARCHAR + */ + DEFINE_VECTORIZED_FN(bitmap_to_binary); + + /** + * @param + * @paramType columns: [TYPE_VARCHAR] + * @return TYPE_BITMAP + */ + DEFINE_VECTORIZED_FN(bitmap_from_binary); }; } // namespace starrocks diff --git a/be/src/types/bitmap_value.h b/be/src/types/bitmap_value.h index 74e8a64b0b61c..a2b86bcf3c1f8 100644 --- a/be/src/types/bitmap_value.h +++ b/be/src/types/bitmap_value.h @@ -203,7 +203,10 @@ class BitmapValue { private: void _from_bitmap_to_smaller_type(); void _from_set_to_bitmap(); - inline void _copy_on_write() { + + // The implementation of this function needs to place .h, + // otherwise it cannot be inlined and affects the performance of BitmapValue::add. + ALWAYS_INLINE void _copy_on_write() { if (UNLIKELY(_bitmap == nullptr)) { _bitmap = std::make_shared(); return; diff --git a/gensrc/script/functions.py b/gensrc/script/functions.py index 7e993b9811ded..0621229274ee7 100644 --- a/gensrc/script/functions.py +++ b/gensrc/script/functions.py @@ -426,18 +426,18 @@ [50231, 'to_days', 'INT', ['DATE'], 'TimeFunctions::to_days'], [50241, 'date_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::datetime_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'], [50242, 'date_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::date_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'], - + # From string to DATE/DATETIME # the function will call by FE getStrToDateFunction, and is invisible to user [50240, 'str_to_date', 'DATETIME', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str_to_date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'], [50243, 'str2date', 'DATE', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str2date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'], - + # Joda Time parse & format - [50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'], - 'TimeFunctions::parse_jodatime', - 'TimeFunctions::parse_joda_prepare', + [50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'], + 'TimeFunctions::parse_jodatime', + 'TimeFunctions::parse_joda_prepare', 'TimeFunctions::parse_joda_close'], - + [50260, 'jodatime_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::jodadatetime_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'], [50261, 'jodatime_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::jodadate_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'], @@ -630,6 +630,8 @@ [91000, 'sub_bitmap', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::sub_bitmap', False], [91001, 'bitmap_subset_limit', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_limit', False], [91002, 'bitmap_subset_in_range', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_in_range', False], + [91003, 'bitmap_to_binary', 'VARBINARY', ['BITMAP'], 'BitmapFunctions::bitmap_to_binary', False], + [91004, 'bitmap_from_binary', 'BITMAP', ['VARBINARY'], 'BitmapFunctions::bitmap_from_binary', False], # hash function [100010, 'murmur_hash3_32', 'INT', ['VARCHAR', '...'], 'HashFunctions::murmur_hash3_32'], diff --git a/test/sql/test_bitmap_functions/R/test_bitmap_functions b/test/sql/test_bitmap_functions/R/test_bitmap_functions index 7b053f35a57a8..e3de8b99be511 100644 --- a/test/sql/test_bitmap_functions/R/test_bitmap_functions +++ b/test/sql/test_bitmap_functions/R/test_bitmap_functions @@ -279,4 +279,132 @@ None 1 1,2 1,2 +-- !result +-- name: test_bitmap_binary +CREATE TABLE `t1` ( + `c1` int(11) NULL COMMENT "", + `c2` bitmap BITMAP_UNION NULL COMMENT "" +) ENGINE=OLAP +AGGREGATE KEY(`c1`) +DISTRIBUTED BY HASH(`c1`) BUCKETS 1 +PROPERTIES ("replication_num" = "1"); +-- result: +-- !result +insert into t1 values (1, bitmap_empty()); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 00 +-- !result +select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 0 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 values (1, to_bitmap(1)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 0101000000 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 1 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 values (1, to_bitmap(17179869184)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 030000000004000000 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 17179869184 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 0A0500000001000000000000000200000000000000030000000000000004000000000000000500000000000000 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 1,2,3,4,5 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 023B3000000100002700010001002700 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20)); +-- result: +-- !result +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1; +-- result: +1 0402000000003A3000000100000000001300100000000100020003000400050006000700080009000A000B000C000D000E000F0010001100120013001400040000003B3000000100006400010000006400 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; +-- result: +1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17179869184,17179869185,17179869186,17179869187,17179869188,17179869189,17179869190,17179869191,17179869192,17179869193,17179869194,17179869195,17179869196,17179869197,17179869198,17179869199,17179869200,17179869201,17179869202,17179869203,17179869204,17179869205,17179869206,17179869207,17179869208,17179869209,17179869210,17179869211,17179869212,17179869213,17179869214,17179869215,17179869216,17179869217,17179869218,17179869219,17179869220,17179869221,17179869222,17179869223,17179869224,17179869225,17179869226,17179869227,17179869228,17179869229,17179869230,17179869231,17179869232,17179869233,17179869234,17179869235,17179869236,17179869237,17179869238,17179869239,17179869240,17179869241,17179869242,17179869243,17179869244,17179869245,17179869246,17179869247,17179869248,17179869249,17179869250,17179869251,17179869252,17179869253,17179869254,17179869255,17179869256,17179869257,17179869258,17179869259,17179869260,17179869261,17179869262,17179869263,17179869264,17179869265,17179869266,17179869267,17179869268,17179869269,17179869270,17179869271,17179869272,17179869273,17179869274,17179869275,17179869276,17179869277,17179869278,17179869279,17179869280,17179869281,17179869282,17179869283,17179869284 +-- !result +truncate table t1; +-- result: +-- !result +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80)); +-- result: +-- !result +insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200)); +-- result: +-- !result +insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910)); +-- result: +-- !result +select c1, hex(bitmap_to_binary(c2)) from t1 order by c1; +-- result: +1 023B3000000100004F00010001004F00 +2 023B300000010000D20002000100C70084030A00 +-- !result +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1; +-- result: +1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80 +2 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,900,901,902,903,904,905,906,907,908,909,910 +-- !result +select bitmap_from_binary(to_binary("1234")); +-- result: +None +-- !result +select bitmap_from_binary(to_binary("")); +-- result: +None +-- !result +select bitmap_from_binary(null); +-- result: +None -- !result \ No newline at end of file diff --git a/test/sql/test_bitmap_functions/T/test_bitmap_functions b/test/sql/test_bitmap_functions/T/test_bitmap_functions index b351d4d05eef4..6eb74555b5fc5 100644 --- a/test/sql/test_bitmap_functions/T/test_bitmap_functions +++ b/test/sql/test_bitmap_functions/T/test_bitmap_functions @@ -69,3 +69,62 @@ select bitmap_to_string(bitmap_subset_in_range(v1, 3, 2)) from test_bitmap_table select bitmap_to_string(bitmap_subset_in_range(v1, 2, 3)) from test_bitmap_table1 order by k1; select bitmap_to_string(bitmap_subset_in_range(v1, -2, 3)) from test_bitmap_table1 order by k1; +-- name: test_bitmap_binary +CREATE TABLE `t1` ( + `c1` int(11) NULL COMMENT "", + `c2` bitmap BITMAP_UNION NULL COMMENT "" +) ENGINE=OLAP +AGGREGATE KEY(`c1`) +DISTRIBUTED BY HASH(`c1`) BUCKETS 1 +PROPERTIES ("replication_num" = "1"); + +-- empty bitmap +insert into t1 values (1, bitmap_empty()); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- single 32bit bitmap +truncate table t1; +insert into t1 values (1, to_bitmap(1)); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- single 64bit bitmap +truncate table t1; +insert into t1 values (1, to_bitmap(17179869184)); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- set bitmap +truncate table t1; +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5)); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- RoaringBitmap32 +truncate table t1; +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40)); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- RoaringBitmap64 +truncate table t1; +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20)); +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284)); +select c1, hex(bitmap_to_binary(c2)) from t1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1; + +-- Buf Resize +truncate table t1; +insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80)); +insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200)); +insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910)); +select c1, hex(bitmap_to_binary(c2)) from t1 order by c1; +select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1; + +-- Invalid format +select bitmap_from_binary(to_binary("1234")); +select bitmap_from_binary(to_binary("")); + +-- Input is null +select bitmap_from_binary(null);