From eaed790b331d45cfec049bafd5251e4250c663a0 Mon Sep 17 00:00:00 2001 From: tobymao Date: Wed, 28 Jun 2023 15:25:01 -0700 Subject: [PATCH] Fix: workaround bigquery grouped alias with order --- sqlglot/dialects/bigquery.py | 35 +++++++++++++++++++++++++- tests/dialects/test_bigquery.py | 10 ++++++++ tests/fixtures/optimizer/optimizer.sql | 6 ++--- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index b9a90ef21c..8786063492 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -107,6 +107,34 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: return expression +# https://issuetracker.google.com/issues/162294746 +# workaround for bigquery bug when grouping by an expression and then ordering +# WITH x AS (SELECT 1 y) +# SELECT y + 1 z +# FROM x +# GROUP BY x + 1 +# ORDER by z +def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: + if isinstance(expression, exp.Select): + group = expression.args.get("group") + order = expression.args.get("order") + + if group and order: + aliases = { + select.this: select.args["alias"] + for select in expression.selects + if isinstance(select, exp.Alias) + } + + for e in group.expressions: + alias = aliases.get(e) + + if alias: + e.replace(exp.column(alias)) + + return expression + + def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" if isinstance(expression, exp.CTE) and expression.alias_column_names: @@ -360,7 +388,12 @@ class Generator(generator.Generator): ), exp.RegexpLike: rename_func("REGEXP_CONTAINS"), exp.Select: transforms.preprocess( - [transforms.explode_to_unnest, _unqualify_unnest, transforms.eliminate_distinct_on] + [ + transforms.explode_to_unnest, + _unqualify_unnest, + transforms.eliminate_distinct_on, + _alias_ordered_group, + ] ), exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 4021049a0b..b830c0e0aa 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -517,6 +517,16 @@ def test_bigquery(self): }, ) + self.validate_identity( + "SELECT y + 1 z FROM x GROUP BY y + 1 ORDER BY z", + "SELECT y + 1 AS z FROM x GROUP BY z ORDER BY z", + ) + self.validate_identity( + "SELECT y + 1 z FROM x GROUP BY y + 1", + "SELECT y + 1 AS z FROM x GROUP BY y + 1", + ) + self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1") + def test_user_defined_functions(self): self.validate_identity( "CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 NOT DETERMINISTIC LANGUAGE js AS 'return x*y;'" diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 83be6f4e65..f71ddde102 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -676,9 +676,9 @@ WHERE `top_terms`.`rank` = 1 AND CAST(`top_terms`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) GROUP BY - `top_terms`.`refresh_date`, - `top_terms`.`term`, - `top_terms`.`rank` + `day`, + `top_term`, + `rank` ORDER BY `day` DESC;