From 212b382df36f5a9d7f352d7b3034deb3fa172f52 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Wed, 30 Oct 2024 15:35:25 +0800 Subject: [PATCH 1/2] feat: implement is_not_null selectivity based on null count in stats --- .../filter_selectivity_sample.rs | 9 +++-- .../planner/optimizer/property/selectivity.rs | 34 ++++++++++++++++++- src/query/sql/src/planner/plans/filter.rs | 2 +- src/query/sql/src/planner/plans/scan.rs | 6 +++- .../explain/selectivity/is_not_null.test | 26 ++++++++++++++ 5 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 tests/sqllogictests/suites/mode/standalone/explain/selectivity/is_not_null.test diff --git a/src/query/sql/src/planner/optimizer/dynamic_sample/filter_selectivity_sample.rs b/src/query/sql/src/planner/optimizer/dynamic_sample/filter_selectivity_sample.rs index 5243158260f06..5dd4d58bdd3e0 100644 --- a/src/query/sql/src/planner/optimizer/dynamic_sample/filter_selectivity_sample.rs +++ b/src/query/sql/src/planner/optimizer/dynamic_sample/filter_selectivity_sample.rs @@ -95,8 +95,13 @@ pub async fn filter_selectivity_sample( if let Some(number_scalar) = count.index(0) { // Compute and return selectivity let selectivity = number_scalar.to_f64().to_f64().unwrap() / sample_size; - let mut statistics = child_rel_expr.derive_cardinality()?.statistics.clone(); - let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new()); + let stat_info = child_rel_expr.derive_cardinality()?; + let mut statistics = stat_info.statistics.clone(); + let mut sb = SelectivityEstimator::new( + &mut statistics, + stat_info.cardinality, + HashSet::new(), + ); sb.update_other_statistic_by_selectivity(selectivity); let stat_info = Arc::new(StatInfo { cardinality: (selectivity * num_rows as f64).ceil(), diff --git a/src/query/sql/src/planner/optimizer/property/selectivity.rs b/src/query/sql/src/planner/optimizer/property/selectivity.rs index a4ec7dbc76d76..5986a0082c6cb 100644 --- a/src/query/sql/src/planner/optimizer/property/selectivity.rs +++ b/src/query/sql/src/planner/optimizer/property/selectivity.rs @@ -52,13 +52,19 @@ const ANY_CHAR_SEL: f64 = 0.9; // not 1, since it won't match end-of-string const FULL_WILDCARD_SEL: f64 = 2.0; pub struct SelectivityEstimator<'a> { + pub cardinality: f64, pub input_stat: &'a mut Statistics, pub updated_column_indexes: HashSet, } impl<'a> SelectivityEstimator<'a> { - pub fn new(input_stat: &'a mut Statistics, updated_column_indexes: HashSet) -> Self { + pub fn new( + input_stat: &'a mut Statistics, + cardinality: f64, + updated_column_indexes: HashSet, + ) -> Self { Self { + cardinality, input_stat, updated_column_indexes, } @@ -102,6 +108,9 @@ impl<'a> SelectivityEstimator<'a> { if func.func_name.eq("like") { return self.compute_like_selectivity(func); } + if func.func_name.eq("is_not_null") { + return self.compute_is_not_null_selectivity(&func.arguments[0]); + } if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) { return self.compute_selectivity_comparison_expr( op, @@ -159,6 +168,29 @@ impl<'a> SelectivityEstimator<'a> { } } + fn compute_is_not_null_selectivity(&mut self, expr: &ScalarExpr) -> Result { + match expr { + ScalarExpr::BoundColumnRef(column_ref) => { + let column_stat = if let Some(stat) = self + .input_stat + .column_stats + .get_mut(&column_ref.column.index) + { + stat + } else { + return Ok(DEFAULT_SELECTIVITY); + }; + if self.cardinality == 0.0 { + return Ok(0.0); + } + let selectivity = + (self.cardinality - column_stat.null_count as f64) / self.cardinality; + Ok(selectivity) + } + _ => Ok(DEFAULT_SELECTIVITY), + } + } + fn compute_selectivity_comparison_expr( &mut self, mut op: ComparisonOp, diff --git a/src/query/sql/src/planner/plans/filter.rs b/src/query/sql/src/planner/plans/filter.rs index b36904a049fb9..058f4c3959ecc 100644 --- a/src/query/sql/src/planner/plans/filter.rs +++ b/src/query/sql/src/planner/plans/filter.rs @@ -87,7 +87,7 @@ impl Operator for Filter { let (input_cardinality, mut statistics) = (stat_info.cardinality, stat_info.statistics.clone()); // Derive cardinality - let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new()); + let mut sb = SelectivityEstimator::new(&mut statistics, input_cardinality, HashSet::new()); let mut selectivity = MAX_SELECTIVITY; for pred in self.predicates.iter() { // Compute selectivity for each conjunction diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index 2e63c6bb308e2..72124bf43945d 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -271,7 +271,11 @@ impl Operator for Scan { column_stats, }; // Derive cardinality - let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new()); + let mut sb = SelectivityEstimator::new( + &mut statistics, + precise_cardinality as f64, + HashSet::new(), + ); let mut selectivity = MAX_SELECTIVITY; for pred in prewhere.predicates.iter() { // Compute selectivity for each conjunction diff --git a/tests/sqllogictests/suites/mode/standalone/explain/selectivity/is_not_null.test b/tests/sqllogictests/suites/mode/standalone/explain/selectivity/is_not_null.test new file mode 100644 index 0000000000000..65d076eb19951 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/selectivity/is_not_null.test @@ -0,0 +1,26 @@ +statement ok +CREATE OR REPLACE TABLE twocolumn (x INT NULL, y INT NULL); + +statement ok +INSERT INTO twocolumn(x, y) VALUES (44,51), (NULL,52), (42,53), (45,45); + +query T +explain select * from twocolumn where x is not NULL; +---- +Filter +├── output columns: [twocolumn.x (#0), twocolumn.y (#1)] +├── filters: [is_not_null(twocolumn.x (#0))] +├── estimated rows: 3.00 +└── TableScan + ├── table: default.default.twocolumn + ├── output columns: [x (#0), y (#1)] + ├── read rows: 4 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_not_null(twocolumn.x (#0))], limit: NONE] + └── estimated rows: 4.00 + +statement ok +DROP TABLE twocolumn; From 5a1098c7679ba0646e20f82b1825dbac3cf5e965 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Wed, 30 Oct 2024 16:47:12 +0800 Subject: [PATCH 2/2] fix test --- .../explain/eliminate_outer_join.test | 30 +++++++++---------- .../standalone/explain/nullable_prune.test | 4 +-- .../explain_native/nullable_prune.test | 4 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test index 8394fb86f67d6..0e7b54503a43c 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test @@ -112,11 +112,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 10.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_not_null(t1.a (#1))] -│ ├── estimated rows: 2.00 +│ ├── estimated rows: 10.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -130,7 +130,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_not_null(t.a (#0))] - ├── estimated rows: 2.00 + ├── estimated rows: 10.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -151,11 +151,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 10.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_not_null(t1.a (#1))] -│ ├── estimated rows: 2.00 +│ ├── estimated rows: 10.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -169,7 +169,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_not_null(t.a (#0))] - ├── estimated rows: 2.00 + ├── estimated rows: 10.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -190,11 +190,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 10.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_not_null(t1.a (#1))] -│ ├── estimated rows: 2.00 +│ ├── estimated rows: 10.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -208,7 +208,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_not_null(t.a (#0))] - ├── estimated rows: 2.00 + ├── estimated rows: 10.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -229,11 +229,11 @@ HashJoin ├── build keys: [t.a (#0)] ├── probe keys: [t1.a (#1)] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 10.00 ├── Filter(Build) │ ├── output columns: [t.a (#0)] │ ├── filters: [is_not_null(t.a (#0))] -│ ├── estimated rows: 2.00 +│ ├── estimated rows: 10.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#0)] @@ -247,7 +247,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t1.a (#1)] ├── filters: [is_not_null(t1.a (#1))] - ├── estimated rows: 2.00 + ├── estimated rows: 10.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#1)] @@ -268,11 +268,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 10.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_not_null(t1.a (#1))] -│ ├── estimated rows: 2.00 +│ ├── estimated rows: 10.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -286,7 +286,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_not_null(t.a (#0))] - ├── estimated rows: 2.00 + ├── estimated rows: 10.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/nullable_prune.test b/tests/sqllogictests/suites/mode/standalone/explain/nullable_prune.test index 6d8c2226d8d26..7fbfed14105fb 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/nullable_prune.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/nullable_prune.test @@ -33,7 +33,7 @@ explain select * from t_nullable_prune where a is not null Filter ├── output columns: [t_nullable_prune.a (#0)] ├── filters: [is_not_null(t_nullable_prune.a (#0))] -├── estimated rows: 1.20 +├── estimated rows: 3.00 └── TableScan ├── table: default.default.t_nullable_prune ├── output columns: [a (#0)] @@ -51,7 +51,7 @@ explain select * from t_nullable_prune where a is null Filter ├── output columns: [t_nullable_prune.a (#0)] ├── filters: [NOT is_not_null(t_nullable_prune.a (#0))] -├── estimated rows: 4.80 +├── estimated rows: 3.00 └── TableScan ├── table: default.default.t_nullable_prune ├── output columns: [a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/nullable_prune.test b/tests/sqllogictests/suites/mode/standalone/explain_native/nullable_prune.test index d5a1b5f7a0500..e1ffe596de1b4 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/nullable_prune.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/nullable_prune.test @@ -39,7 +39,7 @@ TableScan ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [is_not_null(t_nullable_prune.a (#0))], limit: NONE] -└── estimated rows: 1.20 +└── estimated rows: 3.00 query T explain select * from t_nullable_prune where a is null @@ -53,7 +53,7 @@ TableScan ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [NOT is_not_null(t_nullable_prune.a (#0))], limit: NONE] -└── estimated rows: 4.80 +└── estimated rows: 3.00 statement ok DROP TABLE default.default.t_nullable_prune