Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: ScalarVisitor refactor push_down_prewhere #15215

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/query/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
.iter()
.any(|field_name| !field_name.chars().all(|c| c.is_ascii_alphanumeric()))
{
return Err(nom::Err::Error(ErrorKind::Other(
return Err(nom::Err::Failure(ErrorKind::Other(
"Invalid tuple field name, only support alphanumeric characters",
)));
}
Expand Down
9 changes: 5 additions & 4 deletions src/query/ast/tests/it/testdata/statement-error.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,14 @@ error:
create table a (b tuple("c-1" int, "c-2" uint64));
---------- Output ---------
error:
--> SQL:1:48
--> SQL:1:19
|
1 | create table a (b tuple("c-1" int, "c-2" uint64));
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `NULLABLE`, `(`, `NULL`, `NOT`, or `,`
| | | |
| | | while parsing TUPLE(<type>, ...)
| ------ - ^^^^^
| | | |
| | | Invalid tuple field name, only support alphanumeric characters
| | | while parsing type name
| | | while parsing TUPLE(<name> <type>, ...)
| | while parsing `<column name> <type> [DEFAULT <expr>] [AS (<expr>) VIRTUAL] [AS (<expr>) STORED] [COMMENT '<comment>']`
| while parsing `CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [<database>.]<table> [<source>] [<table_options>]`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ use crate::optimizer::rule::Rule;
use crate::optimizer::ColumnSet;
use crate::optimizer::RuleID;
use crate::optimizer::SExpr;
use crate::plans::BoundColumnRef;
use crate::plans::Filter;
use crate::plans::Prewhere;
use crate::plans::RelOp;
use crate::plans::ScalarExpr;
use crate::plans::Scan;
use crate::plans::SubqueryExpr;
use crate::plans::Visitor;
use crate::IndexType;
use crate::MetadataRef;
use crate::Visibility;
Expand Down Expand Up @@ -58,44 +61,47 @@ impl RulePushDownPrewhere {
table_index: IndexType,
schema: &TableSchemaRef,
expr: &ScalarExpr,
columns: &mut ColumnSet,
) -> Result<()> {
match expr {
ScalarExpr::BoundColumnRef(column) => {
) -> Result<ColumnSet> {
struct ColumnVisitor {
table_index: IndexType,
schema: TableSchemaRef,
columns: ColumnSet,
}
impl<'a> Visitor<'a> for ColumnVisitor {
fn visit_bound_column_ref(&mut self, column: &'a BoundColumnRef) -> Result<()> {
if let Some(index) = &column.column.table_index {
if table_index == *index
if self.table_index == *index
&& (column.column.visibility == Visibility::InVisible
|| schema.index_of(column.column.column_name.as_str()).is_ok())
|| self
.schema
.index_of(column.column.column_name.as_str())
.is_ok())
{
columns.insert(column.column.index);
self.columns.insert(column.column.index);
return Ok(());
}
}
return Err(ErrorCode::Unimplemented("Column is not in the table"));
}
ScalarExpr::FunctionCall(func) => {
for arg in func.arguments.iter() {
Self::collect_columns_impl(table_index, schema, arg, columns)?;
}
}
ScalarExpr::CastExpr(cast) => {
Self::collect_columns_impl(table_index, schema, cast.argument.as_ref(), columns)?;
Err(ErrorCode::Unimplemented("Column is not in the table"))
}
ScalarExpr::ConstantExpr(_) => {}
ScalarExpr::UDFCall(udf) => {
for arg in udf.arguments.iter() {
Self::collect_columns_impl(table_index, schema, arg, columns)?;
}
}
_ => {
// SubqueryExpr and AggregateFunction will not appear in Filter-Scan
return Err(ErrorCode::Unimplemented(format!(

fn visit_subquery(&mut self, subquery: &'a SubqueryExpr) -> Result<()> {
Err(ErrorCode::Unimplemented(format!(
"Prewhere don't support expr {:?}",
expr
)));
subquery
)))
}
}
Ok(())

let mut column_visitor = ColumnVisitor {
table_index,
schema: schema.clone(),
columns: ColumnSet::new(),
};
// WindowFunc, SubqueryExpr and AggregateFunction will not appear in Scan
// WindowFunc and AggFunc already check in binder:
// Where clause can't contain aggregate or window functions
column_visitor.visit(expr)?;
Ok(column_visitor.columns)
}

// analyze if the expression can be moved to prewhere
Expand All @@ -104,11 +110,7 @@ impl RulePushDownPrewhere {
schema: &TableSchemaRef,
expr: &ScalarExpr,
) -> Option<ColumnSet> {
let mut columns = ColumnSet::new();
// columns in subqueries are not considered
Self::collect_columns_impl(table_index, schema, expr, &mut columns).ok()?;

Some(columns)
Self::collect_columns_impl(table_index, schema, expr).ok()
}

pub fn prewhere_optimize(&self, s_expr: &SExpr) -> Result<SExpr> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ drop table if exists t_where_optimizer
statement ok
create table if not exists t_where_optimizer (a int, b int)

statement ok
DROP FUNCTION IF EXISTS isnotempty;

statement ok
CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p));

query T
explain select a from t_where_optimizer where a = 1
----
Expand All @@ -17,6 +23,20 @@ TableScan
├── push downs: [filters: [is_true(t_where_optimizer.a (#0) = 1)], limit: NONE]
└── estimated rows: 0.00

query T
explain select a from t_where_optimizer where isnotempty(a)
----
TableScan
├── table: default.default.t_where_optimizer
├── output columns: [a (#0)]
├── read rows: 0
├── read size: 0
├── partitions total: 0
├── partitions scanned: 0
├── push downs: [filters: [NOT NOT is_not_null(t_where_optimizer.a (#0))], limit: NONE]
└── estimated rows: 0.00


query T
explain select * from t_where_optimizer where a = b
----
Expand Down Expand Up @@ -103,3 +123,6 @@ TableScan

statement ok
drop table t_where_optimizer

statement ok
DROP FUNCTION IF EXISTS isnotempty;
Loading