Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement is_not_null selectivity based on null count in stats #16730

Merged
merged 2 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,13 @@ pub async fn filter_selectivity_sample(
if let Some(number_scalar) = count.index(0) {
// Compute and return selectivity
let selectivity = number_scalar.to_f64().to_f64().unwrap() / sample_size;
let mut statistics = child_rel_expr.derive_cardinality()?.statistics.clone();
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
let stat_info = child_rel_expr.derive_cardinality()?;
let mut statistics = stat_info.statistics.clone();
let mut sb = SelectivityEstimator::new(
&mut statistics,
stat_info.cardinality,
HashSet::new(),
);
sb.update_other_statistic_by_selectivity(selectivity);
let stat_info = Arc::new(StatInfo {
cardinality: (selectivity * num_rows as f64).ceil(),
Expand Down
34 changes: 33 additions & 1 deletion src/query/sql/src/planner/optimizer/property/selectivity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,19 @@ const ANY_CHAR_SEL: f64 = 0.9; // not 1, since it won't match end-of-string
const FULL_WILDCARD_SEL: f64 = 2.0;

pub struct SelectivityEstimator<'a> {
pub cardinality: f64,
pub input_stat: &'a mut Statistics,
pub updated_column_indexes: HashSet<IndexType>,
}

impl<'a> SelectivityEstimator<'a> {
pub fn new(input_stat: &'a mut Statistics, updated_column_indexes: HashSet<IndexType>) -> Self {
pub fn new(
input_stat: &'a mut Statistics,
cardinality: f64,
updated_column_indexes: HashSet<IndexType>,
) -> Self {
Self {
cardinality,
input_stat,
updated_column_indexes,
}
Expand Down Expand Up @@ -102,6 +108,9 @@ impl<'a> SelectivityEstimator<'a> {
if func.func_name.eq("like") {
return self.compute_like_selectivity(func);
}
if func.func_name.eq("is_not_null") {
return self.compute_is_not_null_selectivity(&func.arguments[0]);
}
if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) {
return self.compute_selectivity_comparison_expr(
op,
Expand Down Expand Up @@ -159,6 +168,29 @@ impl<'a> SelectivityEstimator<'a> {
}
}

fn compute_is_not_null_selectivity(&mut self, expr: &ScalarExpr) -> Result<f64> {
match expr {
ScalarExpr::BoundColumnRef(column_ref) => {
let column_stat = if let Some(stat) = self
.input_stat
.column_stats
.get_mut(&column_ref.column.index)
{
stat
} else {
return Ok(DEFAULT_SELECTIVITY);
};
if self.cardinality == 0.0 {
return Ok(0.0);
}
let selectivity =
(self.cardinality - column_stat.null_count as f64) / self.cardinality;
Ok(selectivity)
}
_ => Ok(DEFAULT_SELECTIVITY),
}
}

fn compute_selectivity_comparison_expr(
&mut self,
mut op: ComparisonOp,
Expand Down
2 changes: 1 addition & 1 deletion src/query/sql/src/planner/plans/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl Operator for Filter {
let (input_cardinality, mut statistics) =
(stat_info.cardinality, stat_info.statistics.clone());
// Derive cardinality
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
let mut sb = SelectivityEstimator::new(&mut statistics, input_cardinality, HashSet::new());
let mut selectivity = MAX_SELECTIVITY;
for pred in self.predicates.iter() {
// Compute selectivity for each conjunction
Expand Down
6 changes: 5 additions & 1 deletion src/query/sql/src/planner/plans/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,11 @@ impl Operator for Scan {
column_stats,
};
// Derive cardinality
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
let mut sb = SelectivityEstimator::new(
&mut statistics,
precise_cardinality as f64,
HashSet::new(),
);
let mut selectivity = MAX_SELECTIVITY;
for pred in prewhere.predicates.iter() {
// Compute selectivity for each conjunction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ HashJoin
├── build keys: [t1.a (#1)]
├── probe keys: [t.a (#0)]
├── filters: []
├── estimated rows: 2.00
├── estimated rows: 10.00
├── Filter(Build)
│ ├── output columns: [t1.a (#1)]
│ ├── filters: [is_not_null(t1.a (#1))]
│ ├── estimated rows: 2.00
│ ├── estimated rows: 10.00
│ └── TableScan
│ ├── table: default.eliminate_outer_join.t
│ ├── output columns: [a (#1)]
Expand All @@ -130,7 +130,7 @@ HashJoin
└── Filter(Probe)
├── output columns: [t.a (#0)]
├── filters: [is_not_null(t.a (#0))]
├── estimated rows: 2.00
├── estimated rows: 10.00
└── TableScan
├── table: default.eliminate_outer_join.t
├── output columns: [a (#0)]
Expand All @@ -151,11 +151,11 @@ HashJoin
├── build keys: [t1.a (#1)]
├── probe keys: [t.a (#0)]
├── filters: []
├── estimated rows: 2.00
├── estimated rows: 10.00
├── Filter(Build)
│ ├── output columns: [t1.a (#1)]
│ ├── filters: [is_not_null(t1.a (#1))]
│ ├── estimated rows: 2.00
│ ├── estimated rows: 10.00
│ └── TableScan
│ ├── table: default.eliminate_outer_join.t
│ ├── output columns: [a (#1)]
Expand All @@ -169,7 +169,7 @@ HashJoin
└── Filter(Probe)
├── output columns: [t.a (#0)]
├── filters: [is_not_null(t.a (#0))]
├── estimated rows: 2.00
├── estimated rows: 10.00
└── TableScan
├── table: default.eliminate_outer_join.t
├── output columns: [a (#0)]
Expand All @@ -190,11 +190,11 @@ HashJoin
├── build keys: [t1.a (#1)]
├── probe keys: [t.a (#0)]
├── filters: []
├── estimated rows: 2.00
├── estimated rows: 10.00
├── Filter(Build)
│ ├── output columns: [t1.a (#1)]
│ ├── filters: [is_not_null(t1.a (#1))]
│ ├── estimated rows: 2.00
│ ├── estimated rows: 10.00
│ └── TableScan
│ ├── table: default.eliminate_outer_join.t
│ ├── output columns: [a (#1)]
Expand All @@ -208,7 +208,7 @@ HashJoin
└── Filter(Probe)
├── output columns: [t.a (#0)]
├── filters: [is_not_null(t.a (#0))]
├── estimated rows: 2.00
├── estimated rows: 10.00
└── TableScan
├── table: default.eliminate_outer_join.t
├── output columns: [a (#0)]
Expand All @@ -229,11 +229,11 @@ HashJoin
├── build keys: [t.a (#0)]
├── probe keys: [t1.a (#1)]
├── filters: []
├── estimated rows: 2.00
├── estimated rows: 10.00
├── Filter(Build)
│ ├── output columns: [t.a (#0)]
│ ├── filters: [is_not_null(t.a (#0))]
│ ├── estimated rows: 2.00
│ ├── estimated rows: 10.00
│ └── TableScan
│ ├── table: default.eliminate_outer_join.t
│ ├── output columns: [a (#0)]
Expand All @@ -247,7 +247,7 @@ HashJoin
└── Filter(Probe)
├── output columns: [t1.a (#1)]
├── filters: [is_not_null(t1.a (#1))]
├── estimated rows: 2.00
├── estimated rows: 10.00
└── TableScan
├── table: default.eliminate_outer_join.t
├── output columns: [a (#1)]
Expand All @@ -268,11 +268,11 @@ HashJoin
├── build keys: [t1.a (#1)]
├── probe keys: [t.a (#0)]
├── filters: []
├── estimated rows: 2.00
├── estimated rows: 10.00
├── Filter(Build)
│ ├── output columns: [t1.a (#1)]
│ ├── filters: [is_not_null(t1.a (#1))]
│ ├── estimated rows: 2.00
│ ├── estimated rows: 10.00
│ └── TableScan
│ ├── table: default.eliminate_outer_join.t
│ ├── output columns: [a (#1)]
Expand All @@ -286,7 +286,7 @@ HashJoin
└── Filter(Probe)
├── output columns: [t.a (#0)]
├── filters: [is_not_null(t.a (#0))]
├── estimated rows: 2.00
├── estimated rows: 10.00
└── TableScan
├── table: default.eliminate_outer_join.t
├── output columns: [a (#0)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ explain select * from t_nullable_prune where a is not null
Filter
├── output columns: [t_nullable_prune.a (#0)]
├── filters: [is_not_null(t_nullable_prune.a (#0))]
├── estimated rows: 1.20
├── estimated rows: 3.00
└── TableScan
├── table: default.default.t_nullable_prune
├── output columns: [a (#0)]
Expand All @@ -51,7 +51,7 @@ explain select * from t_nullable_prune where a is null
Filter
├── output columns: [t_nullable_prune.a (#0)]
├── filters: [NOT is_not_null(t_nullable_prune.a (#0))]
├── estimated rows: 4.80
├── estimated rows: 3.00
└── TableScan
├── table: default.default.t_nullable_prune
├── output columns: [a (#0)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
statement ok
CREATE OR REPLACE TABLE twocolumn (x INT NULL, y INT NULL);

statement ok
INSERT INTO twocolumn(x, y) VALUES (44,51), (NULL,52), (42,53), (45,45);

query T
explain select * from twocolumn where x is not NULL;
----
Filter
├── output columns: [twocolumn.x (#0), twocolumn.y (#1)]
├── filters: [is_not_null(twocolumn.x (#0))]
├── estimated rows: 3.00
└── TableScan
├── table: default.default.twocolumn
├── output columns: [x (#0), y (#1)]
├── read rows: 4
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_not_null(twocolumn.x (#0))], limit: NONE]
└── estimated rows: 4.00

statement ok
DROP TABLE twocolumn;
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ TableScan
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 2 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_not_null(t_nullable_prune.a (#0))], limit: NONE]
└── estimated rows: 1.20
└── estimated rows: 3.00

query T
explain select * from t_nullable_prune where a is null
Expand All @@ -53,7 +53,7 @@ TableScan
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 2 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [NOT is_not_null(t_nullable_prune.a (#0))], limit: NONE]
└── estimated rows: 4.80
└── estimated rows: 3.00

statement ok
DROP TABLE default.default.t_nullable_prune
Loading