From 8f96f185420d932a7a57c53c5335c05e78c8b6a0 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Thu, 19 Sep 2024 15:16:05 +0800 Subject: [PATCH 1/3] feat: add optimizer rule to eliminate union --- .../sql/src/planner/optimizer/rule/factory.rs | 2 + .../src/planner/optimizer/rule/rewrite/mod.rs | 2 + .../rule/rewrite/rule_eliminate_union.rs | 114 ++++++++++++++ .../sql/src/planner/optimizer/rule/rule.rs | 3 + .../suites/mode/standalone/explain/union.test | 149 +++++++++++------- 5 files changed, 214 insertions(+), 56 deletions(-) create mode 100644 src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs diff --git a/src/query/sql/src/planner/optimizer/rule/factory.rs b/src/query/sql/src/planner/optimizer/rule/factory.rs index e3b322b39e179..378cc3b89f730 100644 --- a/src/query/sql/src/planner/optimizer/rule/factory.rs +++ b/src/query/sql/src/planner/optimizer/rule/factory.rs @@ -16,6 +16,7 @@ use databend_common_exception::Result; use super::rewrite::RuleCommuteJoin; use super::rewrite::RuleEliminateEvalScalar; +use super::rewrite::RuleEliminateUnion; use super::rewrite::RuleFoldCountAggregate; use super::rewrite::RuleNormalizeScalarFilter; use super::rewrite::RulePushDownFilterAggregate; @@ -58,6 +59,7 @@ pub const MAX_PUSH_DOWN_LIMIT: usize = 10000; impl RuleFactory { pub fn create_rule(id: RuleID, metadata: MetadataRef) -> Result { match id { + RuleID::EliminateUnion => Ok(Box::new(RuleEliminateUnion::new(metadata))), RuleID::EliminateEvalScalar => Ok(Box::new(RuleEliminateEvalScalar::new(metadata))), RuleID::PushDownFilterUnion => Ok(Box::new(RulePushDownFilterUnion::new())), RuleID::PushDownFilterEvalScalar => Ok(Box::new(RulePushDownFilterEvalScalar::new())), diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs index 1e55479e3a932..0feaeea4c6fbb 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs @@ -18,6 +18,7 @@ mod rule_commute_join; mod rule_eliminate_eval_scalar; mod rule_eliminate_filter; mod rule_eliminate_sort; +mod rule_eliminate_union; mod rule_fold_count_aggregate; mod rule_merge_eval_scalar; mod rule_merge_filter; @@ -49,6 +50,7 @@ pub use rule_commute_join::RuleCommuteJoin; pub use rule_eliminate_eval_scalar::RuleEliminateEvalScalar; pub use rule_eliminate_filter::RuleEliminateFilter; pub use rule_eliminate_sort::RuleEliminateSort; +pub use rule_eliminate_union::RuleEliminateUnion; pub use rule_fold_count_aggregate::RuleFoldCountAggregate; pub use rule_merge_eval_scalar::RuleMergeEvalScalar; pub use rule_merge_filter::RuleMergeFilter; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs new file mode 100644 index 0000000000000..4d428490edd02 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs @@ -0,0 +1,114 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_expression::DataField; +use databend_common_expression::DataSchemaRefExt; + +use crate::optimizer::extract::Matcher; +use crate::optimizer::rule::Rule; +use crate::optimizer::rule::RuleID; +use crate::optimizer::rule::TransformResult; +use crate::optimizer::RelExpr; +use crate::optimizer::SExpr; +use crate::plans::ConstantTableScan; +use crate::plans::Operator; +use crate::plans::RelOp; +use crate::plans::RelOperator; +use crate::plans::UnionAll; +use crate::MetadataRef; + +pub struct RuleEliminateUnion { + id: RuleID, + matchers: Vec, + metadata: MetadataRef, +} + +impl RuleEliminateUnion { + pub fn new(metadata: MetadataRef) -> Self { + Self { + id: RuleID::EliminateUnion, + matchers: vec![Matcher::MatchOp { + op_type: RelOp::UnionAll, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + metadata, + } + } + + fn is_empty_scan(s_expr: &SExpr) -> Result { + let child_num = s_expr.children.len(); + if child_num > 1 { + return Ok(false); + } + if child_num == 0 { + Ok(matches!( + s_expr.plan(), + RelOperator::DummyTableScan(_) + | RelOperator::ConstantTableScan(ConstantTableScan { num_rows: 0, .. }) + )) + } else { + Self::is_empty_scan(s_expr.child(0)?) + } + } +} + +impl Rule for RuleEliminateUnion { + fn id(&self) -> RuleID { + self.id + } + + fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { + let union: UnionAll = s_expr.plan().clone().try_into()?; + let left_child = s_expr.child(0)?; + let right_child = s_expr.child(1)?; + + if Self::is_empty_scan(left_child)? && Self::is_empty_scan(right_child)? { + // If both children are empty, replace with EmptyResultScan + let union_output_columns = union + .derive_relational_prop(&RelExpr::with_s_expr(s_expr))? + .output_columns + .clone(); + let metadata = self.metadata.read(); + let mut fields = Vec::with_capacity(union_output_columns.len()); + for col in union_output_columns.iter() { + fields.push(DataField::new( + &col.to_string(), + metadata.column(*col).data_type(), + )); + } + + let empty_scan = ConstantTableScan::new_empty_scan( + DataSchemaRefExt::create(fields), + union_output_columns, + ); + let result = SExpr::create_leaf(Arc::new(RelOperator::ConstantTableScan(empty_scan))); + state.add_result(result); + } else if Self::is_empty_scan(left_child)? { + // If left child is empty, use right child + state.add_result(right_child.clone()); + } else if Self::is_empty_scan(right_child)? { + // If right child is empty, use left child + state.add_result(left_child.clone()); + } + + Ok(()) + } + + fn matchers(&self) -> &[Matcher] { + &self.matchers + } +} diff --git a/src/query/sql/src/planner/optimizer/rule/rule.rs b/src/query/sql/src/planner/optimizer/rule/rule.rs index fb19eee25c4a8..95355c9f40380 100644 --- a/src/query/sql/src/planner/optimizer/rule/rule.rs +++ b/src/query/sql/src/planner/optimizer/rule/rule.rs @@ -27,6 +27,7 @@ use crate::optimizer::SExpr; pub static DEFAULT_REWRITE_RULES: LazyLock> = LazyLock::new(|| { vec![ RuleID::EliminateSort, + RuleID::EliminateUnion, RuleID::MergeEvalScalar, // Filter RuleID::EliminateFilter, @@ -78,6 +79,7 @@ pub trait Rule { #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, FromPrimitive, ToPrimitive)] pub enum RuleID { // Rewrite rules + EliminateUnion, NormalizeScalarFilter, PushDownFilterAggregate, PushDownFilterEvalScalar, @@ -152,6 +154,7 @@ impl Display for RuleID { RuleID::EagerAggregation => write!(f, "EagerAggregation"), RuleID::TryApplyAggIndex => write!(f, "TryApplyAggIndex"), RuleID::SemiToInnerJoin => write!(f, "SemiToInnerJoin"), + RuleID::EliminateUnion => write!(f, "EliminateUnion"), } } } diff --git a/tests/sqllogictests/suites/mode/standalone/explain/union.test b/tests/sqllogictests/suites/mode/standalone/explain/union.test index bcc85eab5a6b7..ff948bf6dd73f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/union.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/union.test @@ -28,34 +28,26 @@ explain select * from v where a > b UnionAll ├── output columns: [t1.a (#0), t1.b (#1)] ├── estimated rows: 0.80 -├── Filter -│ ├── output columns: [t1.a (#0), t1.b (#1)] -│ ├── filters: [is_true(t1.a (#0) > t1.b (#1))] -│ ├── estimated rows: 0.40 -│ └── TableScan -│ ├── table: default.default.t1 -│ ├── output columns: [a (#0), b (#1)] -│ ├── read rows: 0 -│ ├── read size: 0 -│ ├── partitions total: 1 -│ ├── partitions scanned: 0 -│ ├── pruning stats: [segments: ] -│ ├── push downs: [filters: [is_true(t1.a (#0) > t1.b (#1))], limit: NONE] -│ └── estimated rows: 2.00 -└── Filter - ├── output columns: [t2.a (#2), t2.b (#3)] - ├── filters: [is_true(t2.a (#2) > t2.b (#3))] - ├── estimated rows: 0.40 - └── TableScan - ├── table: default.default.t2 - ├── output columns: [a (#2), b (#3)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 1 - ├── partitions scanned: 0 - ├── pruning stats: [segments: ] - ├── push downs: [filters: [is_true(t2.a (#2) > t2.b (#3))], limit: NONE] - └── estimated rows: 2.00 +├── TableScan +│ ├── table: default.default.t1 +│ ├── output columns: [a (#0), b (#1)] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: ] +│ ├── push downs: [filters: [is_true(t1.a (#0) > t1.b (#1))], limit: NONE] +│ └── estimated rows: 0.40 +└── TableScan + ├── table: default.default.t2 + ├── output columns: [a (#2), b (#3)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: ] + ├── push downs: [filters: [is_true(t2.a (#2) > t2.b (#3))], limit: NONE] + └── estimated rows: 0.40 query T explain select * from v where a > 1 @@ -63,34 +55,26 @@ explain select * from v where a > 1 UnionAll ├── output columns: [t1.a (#0), t1.b (#1)] ├── estimated rows: 2.00 -├── Filter -│ ├── output columns: [t1.a (#0), t1.b (#1)] -│ ├── filters: [is_true(t1.a (#0) > 1)] -│ ├── estimated rows: 1.00 -│ └── TableScan -│ ├── table: default.default.t1 -│ ├── output columns: [a (#0), b (#1)] -│ ├── read rows: 2 -│ ├── read size: < 1 KiB -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true(t1.a (#0) > 1)], limit: NONE] -│ └── estimated rows: 2.00 -└── Filter - ├── output columns: [t2.a (#2), t2.b (#3)] - ├── filters: [is_true(t2.a (#2) > 1)] - ├── estimated rows: 1.00 - └── TableScan - ├── table: default.default.t2 - ├── output columns: [a (#2), b (#3)] - ├── read rows: 2 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(t2.a (#2) > 1)], limit: NONE] - └── estimated rows: 2.00 +├── TableScan +│ ├── table: default.default.t1 +│ ├── output columns: [a (#0), b (#1)] +│ ├── read rows: 2 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t1.a (#0) > 1)], limit: NONE] +│ └── estimated rows: 1.00 +└── TableScan + ├── table: default.default.t2 + ├── output columns: [a (#2), b (#3)] + ├── read rows: 2 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t2.a (#2) > 1)], limit: NONE] + └── estimated rows: 1.00 query T explain select * from v limit 3 @@ -224,5 +208,58 @@ drop table t1 statement ok drop table t2 +statement ok +create table t1 as select number as a from numbers(10000); + +statement ok +create table t2 as select number as b from numbers(10000); + +query T +explain select * from t1 where t1.a < 0 union all select a from t2 join t1 on t1.a = t2.b where a <0; +---- +EmptyResultScan + + +query T +---- +explain select * from t1 where t1.a < 0 union all select * from t2 ; +---- +TableScan +├── table: default.default.t2 +├── output columns: [] +├── read rows: 10000 +├── read size: 0 +├── partitions total: 1 +├── partitions scanned: 1 +├── pruning stats: [segments: , blocks: ] +├── push downs: [filters: [], limit: NONE] +└── estimated rows: 10000.00 + + +query T +explain select * from t1 union all select * from t2 where t2.b < 0; +---- +TableScan +├── table: default.default.t1 +├── output columns: [a (#0)] +├── read rows: 10000 +├── read size: 39.15 KiB +├── partitions total: 1 +├── partitions scanned: 1 +├── pruning stats: [segments: , blocks: ] +├── push downs: [filters: [], limit: NONE] +└── estimated rows: 10000.00 + +query T +explain select * from t1 where t1.a < 0 union all select * from t2 where t2.b < 0; +---- +EmptyResultScan + +statement ok +drop table t1; + +statement ok +drop table t2; + statement ok drop view v From fc88e5331d465be56e71fb39b256e9be55d62969 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Thu, 19 Sep 2024 22:20:15 +0800 Subject: [PATCH 2/3] fix test --- .../suites/mode/standalone/explain/union.test | 98 +++++++++++-------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/union.test b/tests/sqllogictests/suites/mode/standalone/explain/union.test index ff948bf6dd73f..df5e03f373932 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/union.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/union.test @@ -28,26 +28,34 @@ explain select * from v where a > b UnionAll ├── output columns: [t1.a (#0), t1.b (#1)] ├── estimated rows: 0.80 -├── TableScan -│ ├── table: default.default.t1 -│ ├── output columns: [a (#0), b (#1)] -│ ├── read rows: 0 -│ ├── read size: 0 -│ ├── partitions total: 1 -│ ├── partitions scanned: 0 -│ ├── pruning stats: [segments: ] -│ ├── push downs: [filters: [is_true(t1.a (#0) > t1.b (#1))], limit: NONE] -│ └── estimated rows: 0.40 -└── TableScan - ├── table: default.default.t2 - ├── output columns: [a (#2), b (#3)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 1 - ├── partitions scanned: 0 - ├── pruning stats: [segments: ] - ├── push downs: [filters: [is_true(t2.a (#2) > t2.b (#3))], limit: NONE] - └── estimated rows: 0.40 +├── Filter +│ ├── output columns: [t1.a (#0), t1.b (#1)] +│ ├── filters: [is_true(t1.a (#0) > t1.b (#1))] +│ ├── estimated rows: 0.40 +│ └── TableScan +│ ├── table: default.default.t1 +│ ├── output columns: [a (#0), b (#1)] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: ] +│ ├── push downs: [filters: [is_true(t1.a (#0) > t1.b (#1))], limit: NONE] +│ └── estimated rows: 2.00 +└── Filter + ├── output columns: [t2.a (#2), t2.b (#3)] + ├── filters: [is_true(t2.a (#2) > t2.b (#3))] + ├── estimated rows: 0.40 + └── TableScan + ├── table: default.default.t2 + ├── output columns: [a (#2), b (#3)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: ] + ├── push downs: [filters: [is_true(t2.a (#2) > t2.b (#3))], limit: NONE] + └── estimated rows: 2.00 query T explain select * from v where a > 1 @@ -55,26 +63,34 @@ explain select * from v where a > 1 UnionAll ├── output columns: [t1.a (#0), t1.b (#1)] ├── estimated rows: 2.00 -├── TableScan -│ ├── table: default.default.t1 -│ ├── output columns: [a (#0), b (#1)] -│ ├── read rows: 2 -│ ├── read size: < 1 KiB -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true(t1.a (#0) > 1)], limit: NONE] -│ └── estimated rows: 1.00 -└── TableScan - ├── table: default.default.t2 - ├── output columns: [a (#2), b (#3)] - ├── read rows: 2 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(t2.a (#2) > 1)], limit: NONE] - └── estimated rows: 1.00 +├── Filter +│ ├── output columns: [t1.a (#0), t1.b (#1)] +│ ├── filters: [is_true(t1.a (#0) > 1)] +│ ├── estimated rows: 1.00 +│ └── TableScan +│ ├── table: default.default.t1 +│ ├── output columns: [a (#0), b (#1)] +│ ├── read rows: 2 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t1.a (#0) > 1)], limit: NONE] +│ └── estimated rows: 2.00 +└── Filter + ├── output columns: [t2.a (#2), t2.b (#3)] + ├── filters: [is_true(t2.a (#2) > 1)] + ├── estimated rows: 1.00 + └── TableScan + ├── table: default.default.t2 + ├── output columns: [a (#2), b (#3)] + ├── read rows: 2 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t2.a (#2) > 1)], limit: NONE] + └── estimated rows: 2.00 query T explain select * from v limit 3 @@ -243,7 +259,7 @@ TableScan ├── table: default.default.t1 ├── output columns: [a (#0)] ├── read rows: 10000 -├── read size: 39.15 KiB +├── read size: 10.59 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] From 9d8aeb503bbf3dbea0a9e0b10f8299f607daa8ac Mon Sep 17 00:00:00 2001 From: xudong963 Date: Fri, 20 Sep 2024 00:39:38 +0800 Subject: [PATCH 3/3] fix --- .../src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs index 4d428490edd02..721b342cae813 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_union.rs @@ -57,8 +57,7 @@ impl RuleEliminateUnion { if child_num == 0 { Ok(matches!( s_expr.plan(), - RelOperator::DummyTableScan(_) - | RelOperator::ConstantTableScan(ConstantTableScan { num_rows: 0, .. }) + RelOperator::ConstantTableScan(ConstantTableScan { num_rows: 0, .. }) )) } else { Self::is_empty_scan(s_expr.child(0)?)