Skip to content

Commit

Permalink
Refactor(optimizer): improve handling of DDL optimization (#1972)
Browse files Browse the repository at this point in the history
* Rebase off gtoonstra's commit

* Fixups

* Scope fixups, tests for insert

* Test fixup

* Test fixup
  • Loading branch information
georgesittas authored Jul 28, 2023
1 parent 9ac4550 commit 8a44cc2
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 9 deletions.
25 changes: 23 additions & 2 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,28 @@ class Uncache(Expression):
arg_types = {"this": True, "exists": False}


class Create(Expression):
class DDL(Expression):
@property
def ctes(self):
with_ = self.args.get("with")
if not with_:
return []
return with_.expressions

@property
def named_selects(self) -> t.List[str]:
if isinstance(self.expression, Subqueryable):
return self.expression.named_selects
return []

@property
def selects(self) -> t.List[Expression]:
if isinstance(self.expression, Subqueryable):
return self.expression.selects
return []


class Create(DDL):
arg_types = {
"with": False,
"this": True,
Expand Down Expand Up @@ -1543,7 +1564,7 @@ class Index(Expression):
}


class Insert(Expression):
class Insert(DDL):
arg_types = {
"with": False,
"this": True,
Expand Down
31 changes: 24 additions & 7 deletions sqlglot/optimizer/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,9 +512,12 @@ def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
Returns:
list[Scope]: scope instances
"""
if not isinstance(expression, exp.Unionable):
return []
return list(_traverse_scope(Scope(expression)))
if isinstance(expression, exp.Unionable) or (
isinstance(expression, exp.DDL) and isinstance(expression.expression, exp.Subqueryable)
):
return list(_traverse_scope(Scope(expression)))

return []


def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
Expand Down Expand Up @@ -543,6 +546,8 @@ def _traverse_scope(scope):
yield from _traverse_tables(scope)
elif isinstance(scope.expression, exp.UDTF):
yield from _traverse_udtfs(scope)
elif isinstance(scope.expression, exp.DDL):
yield from _traverse_ddl(scope)
else:
logger.warning(
"Cannot traverse scope %s with type '%s'", scope.expression, type(scope.expression)
Expand Down Expand Up @@ -579,10 +584,10 @@ def _traverse_ctes(scope):
for cte in scope.ctes:
recursive_scope = None

# if the scope is a recursive cte, it must be in the form of
# base_case UNION recursive. thus the recursive scope is the first
# section of the union.
if scope.expression.args["with"].recursive:
# if the scope is a recursive cte, it must be in the form of base_case UNION recursive.
# thus the recursive scope is the first section of the union.
with_ = scope.expression.args.get("with")
if with_ and with_.recursive:
union = cte.this

if isinstance(union, exp.Union):
Expand Down Expand Up @@ -742,6 +747,18 @@ def _traverse_udtfs(scope):
scope.sources.update(sources)


def _traverse_ddl(scope):
yield from _traverse_ctes(scope)

query_scope = scope.branch(
scope.expression.expression, scope_type=ScopeType.DERIVED_TABLE, chain_sources=scope.sources
)
query_scope._collect()
query_scope._ctes = scope.ctes + query_scope._ctes

yield from _traverse_scope(query_scope)


def walk_in_scope(expression, bfs=True):
"""
Returns a generator object which visits all nodes in the syntrax tree, stopping at
Expand Down
35 changes: 35 additions & 0 deletions tests/fixtures/optimizer/qualify_columns_ddl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# title: Create with CTE
WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM cte;
WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT cte.b AS b FROM cte;

# title: Create without CTE
CREATE TABLE foo AS SELECT a FROM tbl;
CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl;

# title: Create with complex CTE with derived table
WITH cte AS (SELECT a FROM (SELECT a from x)) CREATE TABLE s AS SELECT * FROM cte;
WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) CREATE TABLE s AS SELECT cte.a AS a FROM cte;

# title: Create wtih multiple CTEs
WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2;
WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1) CREATE TABLE s AS SELECT cte2.b AS b FROM cte2;

# title: Create with multiple CTEs, selecting only from the first CTE (unnecessary code)
WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte1;
WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1) CREATE TABLE s AS SELECT cte1.b AS b FROM cte1;

# title: Create with multiple derived tables
CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y));
CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _q_0) AS _q_1;

# title: Create with a CTE and a derived table
WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte));
WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte) AS _q_0) AS _q_1;

# title: Insert with CTE
WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte;
WITH cte AS (SELECT y.b AS b FROM y AS y) INSERT INTO s SELECT cte.b AS b FROM cte;

# title: Insert without CTE
INSERT INTO foo SELECT a FROM tbl;
INSERT INTO foo SELECT tbl.a AS a FROM tbl AS tbl;
1 change: 1 addition & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def test_qualify_columns(self, logger):
self.check_file(
"qualify_columns", qualify_columns, execute=True, schema=self.schema, set_dialect=True
)
self.check_file("qualify_columns_ddl", qualify_columns, schema=self.schema)

def test_qualify_columns__with_invisible(self):
schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}})
Expand Down

0 comments on commit 8a44cc2

Please sign in to comment.