-
Notifications
You must be signed in to change notification settings - Fork 600
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add overlay string function (#3671)
* Added (empty) file for overlay function. * Added empty overlay function. * Added test function. * Added second function to avoid optional parameter. * Made functions public. * Added overlay reference to 'expr_ternary_bytes.rs'. * Added test for new_overlay_exp. * Added overlay reference in 'build_expr_from_prost.rs'. * Added Overlay as type of ExprNode. * Added overlay to parser. * Added test for overlay to parser. * Removed redundant parentheses in if-statement. * Added overlay to binder. * Properly embedded overlay with 4 + 1 arguments. * Some minor formating fixes. * Added e2e tests. * Removed obsolete ToDo comment. * Fixed sqlsmith code to generat proper code for overlay functions. Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
- Loading branch information
1 parent
4952c5c
commit d0739a1
Showing
17 changed files
with
438 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
query T | ||
SELECT OVERLAY('abcdef' PLACING '45' FROM 4); | ||
---- | ||
abc45f | ||
|
||
query T | ||
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5); | ||
---- | ||
yabadaba | ||
|
||
query T | ||
SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0); | ||
---- | ||
yabadabadoo | ||
|
||
query T | ||
SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4); | ||
---- | ||
bubba | ||
|
||
statement error | ||
SELECT OVERLAY('abc', 'xyz'); | ||
|
||
statement error | ||
SELECT OVERLAY('abc' PLACING 'xyz'); | ||
|
||
statement error | ||
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2) | ||
|
||
statement error | ||
SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
// Copyright 2022 Singularity Data | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
//! For expression that accept 4 arguments + 1 bytes writer as input. | ||
use risingwave_common::array::{I32Array, Utf8Array}; | ||
use risingwave_common::types::DataType; | ||
|
||
use crate::expr::template::QuaternaryBytesExpression; | ||
use crate::expr::BoxedExpression; | ||
use crate::vector_op::overlay::overlay_for; | ||
|
||
pub fn new_overlay_for_exp( | ||
s: BoxedExpression, | ||
new_sub_str: BoxedExpression, | ||
start: BoxedExpression, | ||
count: BoxedExpression, | ||
return_type: DataType, | ||
) -> BoxedExpression { | ||
Box::new(QuaternaryBytesExpression::< | ||
Utf8Array, | ||
Utf8Array, | ||
I32Array, | ||
I32Array, | ||
_, | ||
>::new( | ||
s, new_sub_str, start, count, return_type, overlay_for | ||
)) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use risingwave_common::array::{DataChunk, Row}; | ||
use risingwave_common::types::{Datum, ScalarImpl}; | ||
|
||
use super::*; | ||
use crate::expr::LiteralExpression; | ||
|
||
fn test_evals_dummy(expr: BoxedExpression, expected: Datum, is_negative_len: bool) { | ||
let res = expr.eval(&DataChunk::new_dummy(1)); | ||
if is_negative_len { | ||
assert!(res.is_err()); | ||
} else { | ||
assert_eq!(res.unwrap().to_datum(), expected); | ||
} | ||
|
||
let res = expr.eval_row(&Row::new(vec![])); | ||
if is_negative_len { | ||
assert!(res.is_err()); | ||
} else { | ||
assert_eq!(res.unwrap(), expected); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_overlay() { | ||
let cases = vec![ | ||
("aaa", "XY", 1, 0, "XYaaa"), | ||
("aaa_aaa", "XYZ", 4, 1, "aaaXYZaaa"), | ||
("aaaaaa", "XYZ", 4, 0, "aaaXYZaaa"), | ||
("aaa___aaa", "X", 4, 3, "aaaXaaa"), | ||
("aaa", "X", 4, -123, "aaaX"), | ||
("aaa_", "X", 4, 123, "aaaX"), | ||
]; | ||
|
||
for (s, new_sub_str, start, count, expected) in cases { | ||
let expr = new_overlay_for_exp( | ||
Box::new(LiteralExpression::new( | ||
DataType::Varchar, | ||
Some(ScalarImpl::from(String::from(s))), | ||
)), | ||
Box::new(LiteralExpression::new( | ||
DataType::Varchar, | ||
Some(ScalarImpl::from(String::from(new_sub_str))), | ||
)), | ||
Box::new(LiteralExpression::new( | ||
DataType::Int32, | ||
Some(ScalarImpl::from(start)), | ||
)), | ||
Box::new(LiteralExpression::new( | ||
DataType::Int32, | ||
Some(ScalarImpl::from(count)), | ||
)), | ||
DataType::Varchar, | ||
); | ||
|
||
test_evals_dummy(expr, Some(ScalarImpl::from(String::from(expected))), false); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// Copyright 2022 Singularity Data | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use risingwave_common::array::{BytesGuard, BytesWriter}; | ||
|
||
use crate::Result; | ||
|
||
#[inline(always)] | ||
pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: BytesWriter) -> Result<BytesGuard> { | ||
// If count is omitted, it defaults to the length of new_sub_str. | ||
overlay_for(s, new_sub_str, start, new_sub_str.len() as i32, writer) | ||
} | ||
|
||
#[inline(always)] | ||
pub fn overlay_for( | ||
s: &str, | ||
new_sub_str: &str, | ||
start: i32, | ||
count: i32, | ||
writer: BytesWriter, | ||
) -> Result<BytesGuard> { | ||
let count = count.max(0) as usize; | ||
|
||
// If start is out of range, attach it to the end. | ||
// Note that indices are 1-based. | ||
let start = ((start - 1).max(0) as usize).min(s.len()); | ||
|
||
let remaining = start + count; | ||
|
||
let mut writer = writer.begin(); | ||
writer.write_ref(&s[..start])?; | ||
writer.write_ref(new_sub_str)?; | ||
|
||
if remaining < s.len() { | ||
writer.write_ref(&s[remaining..])?; | ||
} | ||
|
||
writer.finish().map_err(Into::into) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use risingwave_common::array::{Array, ArrayBuilder, Utf8ArrayBuilder}; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn test_overlay() { | ||
let cases = vec![ | ||
("aaa__aaa", "XY", 4, None, "aaaXYaaa"), | ||
// Place at end. | ||
("aaa", "XY", 4, None, "aaaXY"), | ||
// Place at start. | ||
("aaa", "XY", 1, Some(0), "XYaaa"), | ||
// Replace shorter string. | ||
("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa"), | ||
("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa"), | ||
// Replace longer string. | ||
("aaa___aaa", "X", 4, Some(3), "aaaXaaa"), | ||
// start too small or large. | ||
("aaa", "XY", -123, None, "XYa"), | ||
("aaa", "XY", 123, None, "aaaXY"), | ||
// count too small or large. | ||
("aaa", "X", 4, Some(-123), "aaaX"), | ||
("aaa_", "X", 4, Some(123), "aaaX"), | ||
]; | ||
|
||
for (s, new_sub_str, start, count, expected) in cases { | ||
let builder = Utf8ArrayBuilder::new(1); | ||
let writer = builder.writer(); | ||
let guard = match count { | ||
None => overlay(s, new_sub_str, start, writer), | ||
Some(count) => overlay_for(s, new_sub_str, start, count, writer), | ||
} | ||
.unwrap(); | ||
let array = guard.into_inner().finish().unwrap(); | ||
let v = array.value_at(0).unwrap(); | ||
assert_eq!(v, expected); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.