Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add overlay string function #3671

Merged
merged 20 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions e2e_test/batch/functions/overlay.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
query T
SELECT OVERLAY('abcdef' PLACING '45' FROM 4);
----
abc45f

query T
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5);
----
yabadaba

query T
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0);
----
yabadabadoo

query T
SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4);
----
bubba

statement error
SELECT OVERLAY('abc', 'xyz');

statement error
SELECT OVERLAY('abc' PLACING 'xyz');

statement error
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)

statement error
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)
1 change: 1 addition & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ message ExprNode {
BOOL_OUT = 228;
OCTET_LENGTH = 229;
BIT_LENGTH = 230;
OVERLAY = 231;

// Boolean comparison
IS_TRUE = 301;
Expand Down
22 changes: 21 additions & 1 deletion src/expr/src/expr/build_expr_from_prost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ use crate::expr::expr_binary_nonnull::{new_binary_expr, new_like_default};
use crate::expr::expr_binary_nullable::new_nullable_binary_expr;
use crate::expr::expr_case::{CaseExpression, WhenClause};
use crate::expr::expr_in::InExpression;
use crate::expr::expr_quaternary_bytes::new_overlay_for_exp;
use crate::expr::expr_ternary_bytes::{
new_replace_expr, new_split_part_expr, new_substr_start_end, new_translate_expr,
new_overlay_exp, new_replace_expr, new_split_part_expr, new_substr_start_end,
new_translate_expr,
};
use crate::expr::expr_unary::{
new_length_default, new_ltrim_expr, new_rtrim_expr, new_trim_expr, new_unary_expr,
Expand Down Expand Up @@ -76,6 +78,24 @@ pub fn build_nullable_binary_expr_prost(prost: &ExprNode) -> Result<BoxedExpress
))
}

pub fn build_overlay_expr(prost: &ExprNode) -> Result<BoxedExpression> {
let (children, ret_type) = get_children_and_return_type(prost)?;
ensure!(children.len() == 3 || children.len() == 4);

let s = expr_build_from_prost(&children[0])?;
let new_sub_str = expr_build_from_prost(&children[1])?;
let start = expr_build_from_prost(&children[2])?;

if children.len() == 3 {
Ok(new_overlay_exp(s, new_sub_str, start, ret_type))
} else if children.len() == 4 {
let count = expr_build_from_prost(&children[3])?;
Ok(new_overlay_for_exp(s, new_sub_str, start, count, ret_type))
} else {
unreachable!()
}
}

pub fn build_repeat_expr(prost: &ExprNode) -> Result<BoxedExpression> {
let (children, ret_type) = get_children_and_return_type(prost)?;
ensure!(children.len() == 2);
Expand Down
101 changes: 101 additions & 0 deletions src/expr/src/expr/expr_quaternary_bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2022 Singularity Data
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! For expression that accept 4 arguments + 1 bytes writer as input.

use risingwave_common::array::{I32Array, Utf8Array};
use risingwave_common::types::DataType;

use crate::expr::template::QuaternaryBytesExpression;
use crate::expr::BoxedExpression;
use crate::vector_op::overlay::overlay_for;

pub fn new_overlay_for_exp(
s: BoxedExpression,
new_sub_str: BoxedExpression,
start: BoxedExpression,
count: BoxedExpression,
return_type: DataType,
) -> BoxedExpression {
Box::new(QuaternaryBytesExpression::<
Utf8Array,
Utf8Array,
I32Array,
I32Array,
_,
>::new(
s, new_sub_str, start, count, return_type, overlay_for
))
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{DataChunk, Row};
use risingwave_common::types::{Datum, ScalarImpl};

use super::*;
use crate::expr::LiteralExpression;

fn test_evals_dummy(expr: BoxedExpression, expected: Datum, is_negative_len: bool) {
let res = expr.eval(&DataChunk::new_dummy(1));
if is_negative_len {
assert!(res.is_err());
} else {
assert_eq!(res.unwrap().to_datum(), expected);
}

let res = expr.eval_row(&Row::new(vec![]));
if is_negative_len {
assert!(res.is_err());
} else {
assert_eq!(res.unwrap(), expected);
}
}

#[test]
fn test_overlay() {
let cases = vec![
("aaa", "XY", 1, 0, "XYaaa"),
("aaa_aaa", "XYZ", 4, 1, "aaaXYZaaa"),
("aaaaaa", "XYZ", 4, 0, "aaaXYZaaa"),
("aaa___aaa", "X", 4, 3, "aaaXaaa"),
("aaa", "X", 4, -123, "aaaX"),
("aaa_", "X", 4, 123, "aaaX"),
];

for (s, new_sub_str, start, count, expected) in cases {
let expr = new_overlay_for_exp(
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(s))),
)),
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(new_sub_str))),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(start)),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(count)),
)),
DataType::Varchar,
);

test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}
}
49 changes: 49 additions & 0 deletions src/expr/src/expr/expr_ternary_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use risingwave_common::types::DataType;

use crate::expr::template::TernaryBytesExpression;
use crate::expr::BoxedExpression;
use crate::vector_op::overlay::overlay;
use crate::vector_op::replace::replace;
use crate::vector_op::split_part::split_part;
use crate::vector_op::substr::substr_start_for;
Expand Down Expand Up @@ -92,6 +93,23 @@ pub fn new_split_part_expr(
)
}

pub fn new_overlay_exp(
s: BoxedExpression,
new_sub_str: BoxedExpression,
start: BoxedExpression,
return_type: DataType,
) -> BoxedExpression {
Box::new(
TernaryBytesExpression::<Utf8Array, Utf8Array, I32Array, _>::new(
s,
new_sub_str,
start,
return_type,
overlay,
),
)
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{DataChunk, Row};
Expand Down Expand Up @@ -208,4 +226,35 @@ mod tests {
test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}

#[test]
fn test_overlay() {
let cases = vec![
("aaa__aaa", "XY", 4, "aaaXYaaa"),
("aaa", "XY", 3, "aaXY"),
("aaa", "XY", 4, "aaaXY"),
("aaa", "XY", -123, "XYa"),
("aaa", "XY", 123, "aaaXY"),
];

for (s, new_sub_str, start, expected) in cases {
let expr = new_overlay_exp(
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(s))),
)),
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(new_sub_str))),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(start)),
)),
DataType::Varchar,
);

test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}
}
2 changes: 2 additions & 0 deletions src/expr/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ mod expr_input_ref;
mod expr_is_null;
mod expr_literal;
mod expr_nested_construct;
mod expr_quaternary_bytes;
mod expr_ternary_bytes;
pub mod expr_unary;
mod template;
Expand Down Expand Up @@ -102,6 +103,7 @@ pub fn build_from_prost(prost: &ExprNode) -> Result<BoxedExpression> {
Substr => build_substr_expr(prost),
Length => build_length_expr(prost),
Replace => build_replace_expr(prost),
Overlay => build_overlay_expr(prost),
Like => build_like_expr(prost),
Trim => build_trim_expr(prost),
Ltrim => build_ltrim_expr(prost),
Expand Down
1 change: 1 addition & 0 deletions src/expr/src/expr/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ gen_expr_normal!(TernaryExpression, { IA1, IA2, IA3 }, { 'ia1, 'ia2, 'ia3 });
gen_expr_bytes!(UnaryBytesExpression, { IA1 }, { 'ia1 });
gen_expr_bytes!(BinaryBytesExpression, { IA1, IA2 }, { 'ia1, 'ia2 });
gen_expr_bytes!(TernaryBytesExpression, { IA1, IA2, IA3 }, { 'ia1, 'ia2, 'ia3 });
gen_expr_bytes!(QuaternaryBytesExpression, { IA1, IA2, IA3, IA4 }, { 'ia1, 'ia2, 'ia3, 'ia4 });

gen_expr_nullable!(UnaryNullableExpression, { IA1 }, { 'ia1 });
gen_expr_nullable!(BinaryNullableExpression, { IA1, IA2 }, { 'ia1, 'ia2 });
Expand Down
1 change: 1 addition & 0 deletions src/expr/src/vector_op/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub mod like;
pub mod lower;
pub mod ltrim;
pub mod md5;
pub mod overlay;
pub mod position;
pub mod repeat;
pub mod replace;
Expand Down
92 changes: 92 additions & 0 deletions src/expr/src/vector_op/overlay.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2022 Singularity Data
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use risingwave_common::array::{BytesGuard, BytesWriter};

use crate::Result;

#[inline(always)]
pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: BytesWriter) -> Result<BytesGuard> {
// If count is omitted, it defaults to the length of new_sub_str.
overlay_for(s, new_sub_str, start, new_sub_str.len() as i32, writer)
}

#[inline(always)]
pub fn overlay_for(
s: &str,
new_sub_str: &str,
start: i32,
count: i32,
writer: BytesWriter,
) -> Result<BytesGuard> {
let count = count.max(0) as usize;

// If start is out of range, attach it to the end.
// Note that indices are 1-based.
let start = ((start - 1).max(0) as usize).min(s.len());

let remaining = start + count;

let mut writer = writer.begin();
writer.write_ref(&s[..start])?;
writer.write_ref(new_sub_str)?;

if remaining < s.len() {
writer.write_ref(&s[remaining..])?;
}

writer.finish().map_err(Into::into)
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{Array, ArrayBuilder, Utf8ArrayBuilder};

use super::*;

#[test]
fn test_overlay() {
let cases = vec![
("aaa__aaa", "XY", 4, None, "aaaXYaaa"),
// Place at end.
("aaa", "XY", 4, None, "aaaXY"),
// Place at start.
("aaa", "XY", 1, Some(0), "XYaaa"),
// Replace shorter string.
("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa"),
("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa"),
// Replace longer string.
("aaa___aaa", "X", 4, Some(3), "aaaXaaa"),
// start too small or large.
("aaa", "XY", -123, None, "XYa"),
("aaa", "XY", 123, None, "aaaXY"),
// count too small or large.
("aaa", "X", 4, Some(-123), "aaaX"),
("aaa_", "X", 4, Some(123), "aaaX"),
];

for (s, new_sub_str, start, count, expected) in cases {
let builder = Utf8ArrayBuilder::new(1);
let writer = builder.writer();
let guard = match count {
None => overlay(s, new_sub_str, start, writer),
Some(count) => overlay_for(s, new_sub_str, start, count, writer),
}
.unwrap();
let array = guard.into_inner().finish().unwrap();
let v = array.value_at(0).unwrap();
assert_eq!(v, expected);
}
}
}
1 change: 1 addition & 0 deletions src/frontend/src/binder/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ impl Binder {
"lower" => ExprType::Lower,
"trim" => ExprType::Trim,
"replace" => ExprType::Replace,
"overlay" => ExprType::Overlay,
"position" => ExprType::Position,
"ltrim" => ExprType::Ltrim,
"rtrim" => ExprType::Rtrim,
Expand Down
Loading