Skip to content

Commit

Permalink
Optimization: concat function (#9732)
Browse files Browse the repository at this point in the history
* optimization: concat function

fix: concat_ws

chore: add license header

add arrow feature

update concat

* change Cargo.toml

* pass cargo clippy

* chore: add annotation
  • Loading branch information
JasonLi-cn authored Apr 4, 2024
1 parent 4bd7c13 commit 24fc99c
Show file tree
Hide file tree
Showing 4 changed files with 390 additions and 79 deletions.
5 changes: 5 additions & 0 deletions datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ regex = { version = "1.8", optional = true }
sha2 = { version = "^0.10.1", optional = true }

[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
criterion = "0.5"
rand = { workspace = true }
rstest = { workspace = true }
Expand All @@ -81,3 +82,7 @@ tokio = { workspace = true, features = ["rt-multi-thread"] }
[[bench]]
harness = false
name = "in_list"

[[bench]]
harness = false
name = "concat"
47 changes: 47 additions & 0 deletions datafusion/physical-expr/benches/concat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::util::bench_util::create_string_array_with_len;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
use datafusion_physical_expr::string_expressions::concat;
use std::sync::Arc;

fn create_args(size: usize, str_len: usize) -> Vec<ColumnarValue> {
let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, str_len));
let scalar = ScalarValue::Utf8(Some(", ".to_string()));
vec![
ColumnarValue::Array(array.clone()),
ColumnarValue::Scalar(scalar),
ColumnarValue::Array(array),
]
}

fn criterion_benchmark(c: &mut Criterion) {
for size in [1024, 4096, 8192] {
let args = create_args(size, 32);
let mut group = c.benchmark_group("concat function");
group.bench_function(BenchmarkId::new("concat", size), |b| {
b.iter(|| criterion::black_box(concat(&args).unwrap()))
});
group.finish();
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
6 changes: 3 additions & 3 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ pub fn create_physical_fun(
// string functions
BuiltinScalarFunction::Coalesce => Arc::new(conditional_expressions::coalesce),
BuiltinScalarFunction::Concat => Arc::new(string_expressions::concat),
BuiltinScalarFunction::ConcatWithSeparator => Arc::new(|args| {
make_scalar_function_inner(string_expressions::concat_ws)(args)
}),
BuiltinScalarFunction::ConcatWithSeparator => {
Arc::new(string_expressions::concat_ws)
}
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)
Expand Down
Loading

0 comments on commit 24fc99c

Please sign in to comment.