Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add function bitmap_from_binary and bitmap_to_binary #35621

Merged
merged 2 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion be/src/exprs/bitmap_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_min(FunctionContext* context, const
return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const starrocks::Columns& columns) {
StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARCHAR> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);
Expand Down Expand Up @@ -675,4 +675,57 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_subset_in_range(FunctionContext* con
return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_to_binary(FunctionContext* context, const starrocks::Columns& columns) {
ColumnViewer<TYPE_OBJECT> viewer(columns[0]);

size_t size = columns[0]->size();
ColumnBuilder<TYPE_VARBINARY> builder(size);

raw::RawString buf;
for (int row = 0; row < size; ++row) {
BitmapValue* bitmap = viewer.value(row);
size_t serialize_size = bitmap->getSizeInBytes();
buf.resize(serialize_size);
bitmap->write(buf.data());
builder.append(Slice(buf.data(), serialize_size));
}

ColumnPtr col = builder.build(ColumnHelper::is_all_const(columns));
std::string err_msg;
if (col->capacity_limit_reached(&err_msg)) {
return Status::InternalError(
strings::Substitute("Size of binary column generated by bitmap_to_binary reaches limit: $0", err_msg));
} else {
return col;
}
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_from_binary(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARBINARY> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);

for (int row = 0; row < size; ++row) {
if (viewer.is_null(row)) {
builder.append_null();
continue;
}

auto src_value = viewer.value(row);
if (src_value.size == 0) {
builder.append_null();
continue;
}

BitmapValue bitmap;
bool res = bitmap.valid_and_deserialize(src_value.data, src_value.size);
if (!res) {
builder.append_null();
} else {
builder.append(std::move(bitmap));
}
}
return builder.build(ColumnHelper::is_all_const(columns));
}

} // namespace starrocks
trueeyu marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 14 additions & 0 deletions be/src/exprs/bitmap_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,20 @@ class BitmapFunctions {
* @return TYPE_OBJECT
*/
DEFINE_VECTORIZED_FN(bitmap_subset_limit);

/**
* @param:
* @paramType columns: [TYPE_BITMAP]
* @return TYPE_VARCHAR
*/
DEFINE_VECTORIZED_FN(bitmap_to_binary);

/**
* @param
* @paramType columns: [TYPE_VARCHAR]
* @return TYPE_BITMAP
*/
DEFINE_VECTORIZED_FN(bitmap_from_binary);
};

} // namespace starrocks
trueeyu marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 4 additions & 1 deletion be/src/types/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,10 @@ class BitmapValue {
private:
void _from_bitmap_to_smaller_type();
void _from_set_to_bitmap();
inline void _copy_on_write() {

// The implementation of this function needs to place .h,
// otherwise it cannot be inlined and affects the performance of BitmapValue::add.
ALWAYS_INLINE void _copy_on_write() {
if (UNLIKELY(_bitmap == nullptr)) {
_bitmap = std::make_shared<detail::Roaring64Map>();
return;
Expand Down
14 changes: 8 additions & 6 deletions gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,18 +426,18 @@
[50231, 'to_days', 'INT', ['DATE'], 'TimeFunctions::to_days'],
[50241, 'date_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::datetime_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],
[50242, 'date_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::date_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],

# From string to DATE/DATETIME
# the function will call by FE getStrToDateFunction, and is invisible to user
[50240, 'str_to_date', 'DATETIME', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str_to_date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],
[50243, 'str2date', 'DATE', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str2date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],

# Joda Time parse & format
[50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'],
'TimeFunctions::parse_jodatime',
'TimeFunctions::parse_joda_prepare',
[50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'],
'TimeFunctions::parse_jodatime',
'TimeFunctions::parse_joda_prepare',
'TimeFunctions::parse_joda_close'],

[50260, 'jodatime_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::jodadatetime_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],
[50261, 'jodatime_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::jodadate_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],

Expand Down Expand Up @@ -630,6 +630,8 @@
[91000, 'sub_bitmap', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::sub_bitmap', False],
[91001, 'bitmap_subset_limit', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_limit', False],
[91002, 'bitmap_subset_in_range', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_in_range', False],
[91003, 'bitmap_to_binary', 'VARBINARY', ['BITMAP'], 'BitmapFunctions::bitmap_to_binary', False],
[91004, 'bitmap_from_binary', 'BITMAP', ['VARBINARY'], 'BitmapFunctions::bitmap_from_binary', False],

# hash function
[100010, 'murmur_hash3_32', 'INT', ['VARCHAR', '...'], 'HashFunctions::murmur_hash3_32'],
Expand Down
128 changes: 128 additions & 0 deletions test/sql/test_bitmap_functions/R/test_bitmap_functions
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,132 @@ None
1
1,2
1,2
-- !result
-- name: test_bitmap_binary
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES ("replication_num" = "1");
-- result:
-- !result
insert into t1 values (1, bitmap_empty());
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 00
-- !result
select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 0
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(1));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0101000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(17179869184));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 030000000004000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 17179869184
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0A0500000001000000000000000200000000000000030000000000000004000000000000000500000000000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 023B3000000100002700010001002700
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20));
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0402000000003A3000000100000000001300100000000100020003000400050006000700080009000A000B000C000D000E000F0010001100120013001400040000003B3000000100006400010000006400
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17179869184,17179869185,17179869186,17179869187,17179869188,17179869189,17179869190,17179869191,17179869192,17179869193,17179869194,17179869195,17179869196,17179869197,17179869198,17179869199,17179869200,17179869201,17179869202,17179869203,17179869204,17179869205,17179869206,17179869207,17179869208,17179869209,17179869210,17179869211,17179869212,17179869213,17179869214,17179869215,17179869216,17179869217,17179869218,17179869219,17179869220,17179869221,17179869222,17179869223,17179869224,17179869225,17179869226,17179869227,17179869228,17179869229,17179869230,17179869231,17179869232,17179869233,17179869234,17179869235,17179869236,17179869237,17179869238,17179869239,17179869240,17179869241,17179869242,17179869243,17179869244,17179869245,17179869246,17179869247,17179869248,17179869249,17179869250,17179869251,17179869252,17179869253,17179869254,17179869255,17179869256,17179869257,17179869258,17179869259,17179869260,17179869261,17179869262,17179869263,17179869264,17179869265,17179869266,17179869267,17179869268,17179869269,17179869270,17179869271,17179869272,17179869273,17179869274,17179869275,17179869276,17179869277,17179869278,17179869279,17179869280,17179869281,17179869282,17179869283,17179869284
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1 order by c1;
-- result:
1 023B3000000100004F00010001004F00
2 023B300000010000D20002000100C70084030A00
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80
2 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,900,901,902,903,904,905,906,907,908,909,910
-- !result
select bitmap_from_binary(to_binary("1234"));
-- result:
None
-- !result
select bitmap_from_binary(to_binary(""));
-- result:
None
-- !result
select bitmap_from_binary(null);
-- result:
None
-- !result
59 changes: 59 additions & 0 deletions test/sql/test_bitmap_functions/T/test_bitmap_functions
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,62 @@ select bitmap_to_string(bitmap_subset_in_range(v1, 3, 2)) from test_bitmap_table
select bitmap_to_string(bitmap_subset_in_range(v1, 2, 3)) from test_bitmap_table1 order by k1;
select bitmap_to_string(bitmap_subset_in_range(v1, -2, 3)) from test_bitmap_table1 order by k1;

-- name: test_bitmap_binary
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES ("replication_num" = "1");

-- empty bitmap
insert into t1 values (1, bitmap_empty());
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- single 32bit bitmap
truncate table t1;
insert into t1 values (1, to_bitmap(1));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- single 64bit bitmap
truncate table t1;
insert into t1 values (1, to_bitmap(17179869184));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- set bitmap
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- RoaringBitmap32
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- RoaringBitmap64
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20));
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
imay marked this conversation as resolved.
Show resolved Hide resolved

-- Buf Resize
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80));
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200));
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910));
select c1, hex(bitmap_to_binary(c2)) from t1 order by c1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1;

-- Invalid format
select bitmap_from_binary(to_binary("1234"));
select bitmap_from_binary(to_binary(""));

-- Input is null
select bitmap_from_binary(null);
Loading