Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(frontend): order by arbitrary expr #2329

Merged
merged 3 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions e2e_test/v2/batch/order_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,30 @@ select * from t order by v1 desc
2 3 3
1 4 2

query III
select * from t order by 1;
----
1 4 2
2 3 3
3 4 4
4 3 5

query III
select v2 from t order by v1;
----
4
3
4
3

query III
select * from t order by v1 + v2;
----
1 4 2
2 3 3
3 4 4
4 3 5

query III
select * from t order by v1 desc limit 1
----
Expand Down
1 change: 1 addition & 0 deletions src/frontend/src/binder/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ impl Binder {
order: vec![],
limit: None,
offset: None,
extra_order_exprs: vec![],
},
vec![],
)
Expand Down
40 changes: 30 additions & 10 deletions src/frontend/src/binder/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ use std::collections::HashMap;
use risingwave_common::catalog::Schema;
use risingwave_common::error::{ErrorCode, Result};
use risingwave_common::types::DataType;
use risingwave_sqlparser::ast::{Expr, OrderByExpr, Query};
use risingwave_sqlparser::ast::{Expr, OrderByExpr, Query, Value};

use crate::binder::{Binder, BoundSetExpr};
use crate::expr::ExprImpl;
use crate::optimizer::property::{Direction, FieldOrder};

/// A validated sql query, including order and union.
Expand All @@ -30,6 +31,7 @@ pub struct BoundQuery {
pub order: Vec<FieldOrder>,
pub limit: Option<usize>,
pub offset: Option<usize>,
pub extra_order_exprs: Vec<ExprImpl>,
}

impl BoundQuery {
Expand Down Expand Up @@ -76,39 +78,57 @@ impl Binder {
}),
BoundSetExpr::Values(_) => {}
};
let mut extra_order_exprs = vec![];
let visible_output_num = body.schema().len();
let order = query
.order_by
.into_iter()
.map(|order_by_expr| self.bind_order_by_expr(order_by_expr, &name_to_index))
.map(|order_by_expr| {
self.bind_order_by_expr(
order_by_expr,
&name_to_index,
&mut extra_order_exprs,
visible_output_num,
)
})
.collect::<Result<_>>()?;
Ok(BoundQuery {
body,
order,
limit,
offset,
extra_order_exprs,
})
}

fn bind_order_by_expr(
&mut self,
order_by_expr: OrderByExpr,
name_to_index: &HashMap<String, usize>,
extra_order_exprs: &mut Vec<ExprImpl>,
visible_output_num: usize,
) -> Result<FieldOrder> {
let direct = match order_by_expr.asc {
None | Some(true) => Direction::Asc,
Some(false) => Direction::Desc,
};
let name = match order_by_expr.expr {
Expr::Identifier(name) => name.value,
let index = match order_by_expr.expr {
Expr::Identifier(name) if let Some(index) = name_to_index.get(&name.value) => *index,
Expr::Value(Value::Number(number, _)) => match number.parse::<usize>() {
Ok(index) if 1 <= index && index <= visible_output_num => index - 1,
_ => {
return Err(ErrorCode::InvalidInputSyntax(format!(
"Invalid value in ORDER BY: {}",
number
))
.into())
}
},
expr => {
return Err(
ErrorCode::NotImplemented(format!("ORDER BY {:?}", expr), 1635.into()).into(),
)
extra_order_exprs.push(self.bind_expr(expr)?);
visible_output_num + extra_order_exprs.len() - 1
}
};
let index = *name_to_index
.get(&name)
.ok_or_else(|| ErrorCode::ItemNotFound(format!("output column \"{}\"", name)))?;
Ok(FieldOrder { index, direct })
}
}
1 change: 1 addition & 0 deletions src/frontend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#![feature(let_else)]
#![feature(trait_alias)]
#![feature(drain_filter)]
#![feature(if_let_guard)]
#[macro_use]
pub mod catalog;
pub mod binder;
Expand Down
31 changes: 28 additions & 3 deletions src/frontend/src/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use risingwave_common::catalog::Schema;
use risingwave_common::error::Result;

use self::heuristic::{ApplyOrder, HeuristicOptimizer};
use self::plan_node::{Convention, LogicalProject, StreamMaterialize};
use self::plan_node::{BatchProject, Convention, LogicalProject, StreamMaterialize};
use self::rule::*;
use crate::catalog::TableId;
use crate::expr::InputRef;
Expand Down Expand Up @@ -141,7 +141,14 @@ impl PlanRoot {
};

// Prune Columns
plan = plan.prune_col(&self.out_fields);
//
// Currently, the expressions in ORDER BY will be merged into the expressions in SELECT and
// they shouldn't be a part of output columns, so we use `out_fields` to control the
// visibility of these expressions. To avoid these expressions being pruned, we can't
// use `self.out_fields` as `required_cols` here.
let mut required_cols = FixedBitSet::with_capacity(self.plan.schema().len());
required_cols.insert_range(..);
plan = plan.prune_col(&required_cols);

plan = {
let rules = vec![
Expand All @@ -167,7 +174,25 @@ impl PlanRoot {
plan = plan.to_batch_with_order_required(&self.required_order);

// Convert to distributed plan
plan.to_distributed_with_required(&self.required_order, &self.required_dist)
plan = plan.to_distributed_with_required(&self.required_order, &self.required_dist);

// Add Project if the any position of `self.out_fields` is set to zero.
if self.out_fields.count_ones(..) != self.out_fields.len() {
let (exprs, expr_aliases) = self
.out_fields
.ones()
.zip_eq(self.schema.fields.clone())
.map(|(index, field)| {
(
InputRef::new(index, field.data_type).into(),
Some(field.name),
)
})
.unzip();
plan = BatchProject::new(LogicalProject::new(plan, exprs, expr_aliases)).into();
}

plan
}

/// Generate create index or create materialize view plan.
Expand Down
5 changes: 3 additions & 2 deletions src/frontend/src/planner/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ pub const LIMIT_ALL_COUNT: usize = usize::MAX / 2;
impl Planner {
/// Plan a [`BoundQuery`]. Need to bind before planning.
pub fn plan_query(&mut self, query: BoundQuery) -> Result<PlanRoot> {
let mut plan = self.plan_set_expr(query.body)?;
let extra_order_exprs_len = query.extra_order_exprs.len();
let mut plan = self.plan_set_expr(query.body, query.extra_order_exprs)?;
let order = Order {
field_order: query.order,
};
Expand All @@ -43,7 +44,7 @@ impl Planner {
}
let dist = Distribution::Single;
let mut out_fields = FixedBitSet::with_capacity(plan.schema().len());
out_fields.insert_range(..);
out_fields.insert_range(..plan.schema().len() - extra_order_exprs_len);
let root = PlanRoot::new(plan, dist, order, out_fields);
Ok(root)
}
Expand Down
13 changes: 12 additions & 1 deletion src/frontend/src/planner/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,22 @@ impl Planner {
mut select_items,
group_by,
mut having,
aliases,
mut aliases,
distinct,
..
}: BoundSelect,
extra_order_exprs: Vec<ExprImpl>,
) -> Result<PlanRef> {
// Append expressions in ORDER BY.
if distinct && !extra_order_exprs.is_empty() {
return Err(ErrorCode::InvalidInputSyntax(
"for SELECT DISTINCT, ORDER BY expressions must appear in select list".into(),
)
.into());
}
aliases.extend(vec![None; extra_order_exprs.len()]);
select_items.extend(extra_order_exprs);

// Plan the FROM clause.
let mut root = match from {
None => self.create_dummy_values(),
Expand Down
9 changes: 7 additions & 2 deletions src/frontend/src/planner/set_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,18 @@
use risingwave_common::error::Result;

use crate::binder::BoundSetExpr;
use crate::expr::ExprImpl;
use crate::optimizer::plan_node::PlanRef;
use crate::planner::Planner;

impl Planner {
pub(super) fn plan_set_expr(&mut self, set_expr: BoundSetExpr) -> Result<PlanRef> {
pub(super) fn plan_set_expr(
&mut self,
set_expr: BoundSetExpr,
extra_order_exprs: Vec<ExprImpl>,
) -> Result<PlanRef> {
match set_expr {
BoundSetExpr::Select(s) => self.plan_select(*s),
BoundSetExpr::Select(s) => self.plan_select(*s, extra_order_exprs),
BoundSetExpr::Values(v) => self.plan_values(*v),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/frontend/test_runner/tests/testdata/nexmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@
WHERE A.id = B.auction and B.dateTime between A.dateTime and A.expires
GROUP BY A.id, A.seller
) AS Q;
planner_error: "Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function"
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- id: nexmark_q7
before:
- create_tables
Expand Down
55 changes: 53 additions & 2 deletions src/frontend/test_runner/tests/testdata/order_by.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,20 @@
- sql: |
create table t (v1 bigint, v2 double precision);
select * from t order by 1+1;
binder_error: 'Feature is not yet implemented: ORDER BY BinaryOp { left: Value(Number("1", false)), op: Plus, right: Value(Number("1", false)) }, Tracking issue: https://github.com/singularity-data/risingwave/issues/1635'
batch_plan: |
BatchProject { exprs: [$0, $1], expr_alias: [v1, v2] }
BatchExchange { order: [$2 ASC], dist: Single }
BatchSort { order: [$2 ASC] }
BatchProject { exprs: [$0, $1, (1:Int32 + 1:Int32)], expr_alias: [v1, v2, ] }
BatchScan { table: t, columns: [v1, v2] }
stream_plan: |
StreamMaterialize { columns: [v1, v2, expr#2(hidden), _row_id#0(hidden)], pk_columns: [_row_id#0], order_descs: [expr#2, _row_id#0] }
StreamProject { exprs: [$0, $1, (1:Int32 + 1:Int32), $2], expr_alias: [v1, v2, , ] }
StreamTableScan { table: t, columns: [v1, v2, _row_id#0], pk_indices: [2] }
- sql: |
create table t (v1 bigint, v2 double precision);
select * from t order by v;
binder_error: 'Item not found: output column "v"'
binder_error: 'Item not found: Invalid column: v'
- sql: |
create table t (v1 bigint, v2 double precision);
select * from t order by v1 desc limit 5;
Expand All @@ -70,3 +79,45 @@
StreamTopN { order: [$0 DESC], limit: 5, offset: 7 }
StreamExchange { dist: Single }
StreamTableScan { table: t, columns: [v1, v2, _row_id#0], pk_indices: [2] }
- sql: |
/* order by expression that would be valid in select list */
create table t (x int, y int, z int);
select x, y from t order by x + y, z;
optimized_logical_plan: |
LogicalProject { exprs: [$0, $1, ($0 + $1), $2], expr_alias: [x, y, , ] }
LogicalScan { table: t, columns: [x, y, z] }
batch_plan: |
BatchProject { exprs: [$0, $1], expr_alias: [x, y] }
BatchExchange { order: [$2 ASC, $3 ASC], dist: Single }
BatchSort { order: [$2 ASC, $3 ASC] }
BatchProject { exprs: [$0, $1, ($0 + $1), $2], expr_alias: [x, y, , ] }
BatchScan { table: t, columns: [x, y, z] }
stream_plan: |
StreamMaterialize { columns: [x, y, expr#2(hidden), z(hidden), _row_id#0(hidden)], pk_columns: [_row_id#0], order_descs: [expr#2, z, _row_id#0] }
StreamProject { exprs: [$0, $1, ($0 + $1), $2, $3], expr_alias: [x, y, , , ] }
StreamTableScan { table: t, columns: [x, y, z, _row_id#0], pk_indices: [3] }
- sql: |
/* order by the number of an output column */
create table t (x int, y int);
select x, y from t order by 2;
optimized_logical_plan: |
LogicalScan { table: t, columns: [x, y] }
batch_plan: |
BatchExchange { order: [$1 ASC], dist: Single }
BatchSort { order: [$1 ASC] }
BatchScan { table: t, columns: [x, y] }
- sql: |
/* index exceeds the number of select items */
create table t (x int, y int);
select x from t order by 2;
binder_error: 'Invalid input syntax: Invalid value in ORDER BY: 2'
- sql: |
/* an output column name cannot be used in an expression */
create table t (x int, y int);
select x + y as sum from t order by sum + 1;
binder_error: 'Item not found: Invalid column: sum'
- sql: |
/* select distinct with order by expressions not appear in select list */
create table t (x int, y int);
select distinct x from t order by y;
planner_error: 'Invalid input syntax: for SELECT DISTINCT, ORDER BY expressions must appear in select list'