Skip to content

Commit

Permalink
feat: add overlay string function (#3671)
Browse files Browse the repository at this point in the history
* Added (empty) file for overlay function.

* Added empty overlay function.

* Added test function.

* Added second function to avoid optional parameter.

* Made functions public.

* Added overlay reference to 'expr_ternary_bytes.rs'.

* Added test for new_overlay_exp.

* Added overlay reference in 'build_expr_from_prost.rs'.

* Added Overlay as type of ExprNode.

* Added overlay to parser.

* Added test for overlay to parser.

* Removed redundant parentheses in if-statement.

* Added overlay to binder.

* Properly embedded overlay with 4 + 1 arguments.

* Some minor formating fixes.

* Added e2e tests.

* Removed obsolete ToDo comment.

* Fixed sqlsmith code to generat proper code for overlay functions.

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
ALeitert and mergify[bot] authored Jul 6, 2022
1 parent 4952c5c commit d0739a1
Show file tree
Hide file tree
Showing 17 changed files with 438 additions and 1 deletion.
31 changes: 31 additions & 0 deletions e2e_test/batch/functions/overlay.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
query T
SELECT OVERLAY('abcdef' PLACING '45' FROM 4);
----
abc45f

query T
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5);
----
yabadaba

query T
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0);
----
yabadabadoo

query T
SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4);
----
bubba

statement error
SELECT OVERLAY('abc', 'xyz');

statement error
SELECT OVERLAY('abc' PLACING 'xyz');

statement error
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)

statement error
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)
1 change: 1 addition & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ message ExprNode {
BOOL_OUT = 228;
OCTET_LENGTH = 229;
BIT_LENGTH = 230;
OVERLAY = 231;

// Boolean comparison
IS_TRUE = 301;
Expand Down
22 changes: 21 additions & 1 deletion src/expr/src/expr/build_expr_from_prost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ use crate::expr::expr_binary_nonnull::{new_binary_expr, new_like_default};
use crate::expr::expr_binary_nullable::new_nullable_binary_expr;
use crate::expr::expr_case::{CaseExpression, WhenClause};
use crate::expr::expr_in::InExpression;
use crate::expr::expr_quaternary_bytes::new_overlay_for_exp;
use crate::expr::expr_ternary_bytes::{
new_replace_expr, new_split_part_expr, new_substr_start_end, new_translate_expr,
new_overlay_exp, new_replace_expr, new_split_part_expr, new_substr_start_end,
new_translate_expr,
};
use crate::expr::expr_unary::{
new_length_default, new_ltrim_expr, new_rtrim_expr, new_trim_expr, new_unary_expr,
Expand Down Expand Up @@ -76,6 +78,24 @@ pub fn build_nullable_binary_expr_prost(prost: &ExprNode) -> Result<BoxedExpress
))
}

pub fn build_overlay_expr(prost: &ExprNode) -> Result<BoxedExpression> {
let (children, ret_type) = get_children_and_return_type(prost)?;
ensure!(children.len() == 3 || children.len() == 4);

let s = expr_build_from_prost(&children[0])?;
let new_sub_str = expr_build_from_prost(&children[1])?;
let start = expr_build_from_prost(&children[2])?;

if children.len() == 3 {
Ok(new_overlay_exp(s, new_sub_str, start, ret_type))
} else if children.len() == 4 {
let count = expr_build_from_prost(&children[3])?;
Ok(new_overlay_for_exp(s, new_sub_str, start, count, ret_type))
} else {
unreachable!()
}
}

pub fn build_repeat_expr(prost: &ExprNode) -> Result<BoxedExpression> {
let (children, ret_type) = get_children_and_return_type(prost)?;
ensure!(children.len() == 2);
Expand Down
101 changes: 101 additions & 0 deletions src/expr/src/expr/expr_quaternary_bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2022 Singularity Data
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! For expression that accept 4 arguments + 1 bytes writer as input.
use risingwave_common::array::{I32Array, Utf8Array};
use risingwave_common::types::DataType;

use crate::expr::template::QuaternaryBytesExpression;
use crate::expr::BoxedExpression;
use crate::vector_op::overlay::overlay_for;

pub fn new_overlay_for_exp(
s: BoxedExpression,
new_sub_str: BoxedExpression,
start: BoxedExpression,
count: BoxedExpression,
return_type: DataType,
) -> BoxedExpression {
Box::new(QuaternaryBytesExpression::<
Utf8Array,
Utf8Array,
I32Array,
I32Array,
_,
>::new(
s, new_sub_str, start, count, return_type, overlay_for
))
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{DataChunk, Row};
use risingwave_common::types::{Datum, ScalarImpl};

use super::*;
use crate::expr::LiteralExpression;

fn test_evals_dummy(expr: BoxedExpression, expected: Datum, is_negative_len: bool) {
let res = expr.eval(&DataChunk::new_dummy(1));
if is_negative_len {
assert!(res.is_err());
} else {
assert_eq!(res.unwrap().to_datum(), expected);
}

let res = expr.eval_row(&Row::new(vec![]));
if is_negative_len {
assert!(res.is_err());
} else {
assert_eq!(res.unwrap(), expected);
}
}

#[test]
fn test_overlay() {
let cases = vec![
("aaa", "XY", 1, 0, "XYaaa"),
("aaa_aaa", "XYZ", 4, 1, "aaaXYZaaa"),
("aaaaaa", "XYZ", 4, 0, "aaaXYZaaa"),
("aaa___aaa", "X", 4, 3, "aaaXaaa"),
("aaa", "X", 4, -123, "aaaX"),
("aaa_", "X", 4, 123, "aaaX"),
];

for (s, new_sub_str, start, count, expected) in cases {
let expr = new_overlay_for_exp(
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(s))),
)),
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(new_sub_str))),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(start)),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(count)),
)),
DataType::Varchar,
);

test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}
}
49 changes: 49 additions & 0 deletions src/expr/src/expr/expr_ternary_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use risingwave_common::types::DataType;

use crate::expr::template::TernaryBytesExpression;
use crate::expr::BoxedExpression;
use crate::vector_op::overlay::overlay;
use crate::vector_op::replace::replace;
use crate::vector_op::split_part::split_part;
use crate::vector_op::substr::substr_start_for;
Expand Down Expand Up @@ -92,6 +93,23 @@ pub fn new_split_part_expr(
)
}

pub fn new_overlay_exp(
s: BoxedExpression,
new_sub_str: BoxedExpression,
start: BoxedExpression,
return_type: DataType,
) -> BoxedExpression {
Box::new(
TernaryBytesExpression::<Utf8Array, Utf8Array, I32Array, _>::new(
s,
new_sub_str,
start,
return_type,
overlay,
),
)
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{DataChunk, Row};
Expand Down Expand Up @@ -208,4 +226,35 @@ mod tests {
test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}

#[test]
fn test_overlay() {
let cases = vec![
("aaa__aaa", "XY", 4, "aaaXYaaa"),
("aaa", "XY", 3, "aaXY"),
("aaa", "XY", 4, "aaaXY"),
("aaa", "XY", -123, "XYa"),
("aaa", "XY", 123, "aaaXY"),
];

for (s, new_sub_str, start, expected) in cases {
let expr = new_overlay_exp(
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(s))),
)),
Box::new(LiteralExpression::new(
DataType::Varchar,
Some(ScalarImpl::from(String::from(new_sub_str))),
)),
Box::new(LiteralExpression::new(
DataType::Int32,
Some(ScalarImpl::from(start)),
)),
DataType::Varchar,
);

test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false);
}
}
}
2 changes: 2 additions & 0 deletions src/expr/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ mod expr_input_ref;
mod expr_is_null;
mod expr_literal;
mod expr_nested_construct;
mod expr_quaternary_bytes;
mod expr_ternary_bytes;
pub mod expr_unary;
mod template;
Expand Down Expand Up @@ -102,6 +103,7 @@ pub fn build_from_prost(prost: &ExprNode) -> Result<BoxedExpression> {
Substr => build_substr_expr(prost),
Length => build_length_expr(prost),
Replace => build_replace_expr(prost),
Overlay => build_overlay_expr(prost),
Like => build_like_expr(prost),
Trim => build_trim_expr(prost),
Ltrim => build_ltrim_expr(prost),
Expand Down
1 change: 1 addition & 0 deletions src/expr/src/expr/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ gen_expr_normal!(TernaryExpression, { IA1, IA2, IA3 }, { 'ia1, 'ia2, 'ia3 });
gen_expr_bytes!(UnaryBytesExpression, { IA1 }, { 'ia1 });
gen_expr_bytes!(BinaryBytesExpression, { IA1, IA2 }, { 'ia1, 'ia2 });
gen_expr_bytes!(TernaryBytesExpression, { IA1, IA2, IA3 }, { 'ia1, 'ia2, 'ia3 });
gen_expr_bytes!(QuaternaryBytesExpression, { IA1, IA2, IA3, IA4 }, { 'ia1, 'ia2, 'ia3, 'ia4 });

gen_expr_nullable!(UnaryNullableExpression, { IA1 }, { 'ia1 });
gen_expr_nullable!(BinaryNullableExpression, { IA1, IA2 }, { 'ia1, 'ia2 });
Expand Down
1 change: 1 addition & 0 deletions src/expr/src/vector_op/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub mod like;
pub mod lower;
pub mod ltrim;
pub mod md5;
pub mod overlay;
pub mod position;
pub mod repeat;
pub mod replace;
Expand Down
92 changes: 92 additions & 0 deletions src/expr/src/vector_op/overlay.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2022 Singularity Data
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use risingwave_common::array::{BytesGuard, BytesWriter};

use crate::Result;

#[inline(always)]
pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: BytesWriter) -> Result<BytesGuard> {
// If count is omitted, it defaults to the length of new_sub_str.
overlay_for(s, new_sub_str, start, new_sub_str.len() as i32, writer)
}

#[inline(always)]
pub fn overlay_for(
s: &str,
new_sub_str: &str,
start: i32,
count: i32,
writer: BytesWriter,
) -> Result<BytesGuard> {
let count = count.max(0) as usize;

// If start is out of range, attach it to the end.
// Note that indices are 1-based.
let start = ((start - 1).max(0) as usize).min(s.len());

let remaining = start + count;

let mut writer = writer.begin();
writer.write_ref(&s[..start])?;
writer.write_ref(new_sub_str)?;

if remaining < s.len() {
writer.write_ref(&s[remaining..])?;
}

writer.finish().map_err(Into::into)
}

#[cfg(test)]
mod tests {
use risingwave_common::array::{Array, ArrayBuilder, Utf8ArrayBuilder};

use super::*;

#[test]
fn test_overlay() {
let cases = vec![
("aaa__aaa", "XY", 4, None, "aaaXYaaa"),
// Place at end.
("aaa", "XY", 4, None, "aaaXY"),
// Place at start.
("aaa", "XY", 1, Some(0), "XYaaa"),
// Replace shorter string.
("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa"),
("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa"),
// Replace longer string.
("aaa___aaa", "X", 4, Some(3), "aaaXaaa"),
// start too small or large.
("aaa", "XY", -123, None, "XYa"),
("aaa", "XY", 123, None, "aaaXY"),
// count too small or large.
("aaa", "X", 4, Some(-123), "aaaX"),
("aaa_", "X", 4, Some(123), "aaaX"),
];

for (s, new_sub_str, start, count, expected) in cases {
let builder = Utf8ArrayBuilder::new(1);
let writer = builder.writer();
let guard = match count {
None => overlay(s, new_sub_str, start, writer),
Some(count) => overlay_for(s, new_sub_str, start, count, writer),
}
.unwrap();
let array = guard.into_inner().finish().unwrap();
let v = array.value_at(0).unwrap();
assert_eq!(v, expected);
}
}
}
1 change: 1 addition & 0 deletions src/frontend/src/binder/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ impl Binder {
"lower" => ExprType::Lower,
"trim" => ExprType::Trim,
"replace" => ExprType::Replace,
"overlay" => ExprType::Overlay,
"position" => ExprType::Position,
"ltrim" => ExprType::Ltrim,
"rtrim" => ExprType::Rtrim,
Expand Down
Loading

0 comments on commit d0739a1

Please sign in to comment.