Skip to content

Commit

Permalink
feat: simplify COALESCE (#2019)
Browse files Browse the repository at this point in the history
* feat: simplify COALESCE

* fixup

* cleanup comment

* fixup

* fixup
  • Loading branch information
barakalon authored Aug 10, 2023
1 parent c3fd695 commit 6e2705e
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 3 deletions.
62 changes: 62 additions & 0 deletions sqlglot/optimizer/simplify.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def _simplify(expression, root=True):
node.parent = expression.parent
node = simplify_literals(node, root)
node = simplify_parens(node)
node = simplify_coalesce(node)
if root:
expression.replace(node)
return node
Expand Down Expand Up @@ -184,6 +185,7 @@ def _simplify_connectors(expression, left, right):
*GT_GTE,
exp.EQ,
exp.NEQ,
exp.Is,
)

INVERSE_COMPARISONS = {
Expand Down Expand Up @@ -430,6 +432,66 @@ def simplify_parens(expression):
return expression


CONSTANTS = (
exp.Literal,
exp.Boolean,
exp.Null,
)


def simplify_coalesce(expression):
# COALESCE(x) -> x
if (
isinstance(expression, exp.Coalesce)
and not expression.expressions
# COALESCE is also used as a Spark partitioning hint
and not isinstance(expression.parent, exp.Hint)
):
return expression.this

if not isinstance(expression, COMPARISONS):
return expression

if isinstance(expression.left, exp.Coalesce):
coalesce = expression.left
other = expression.right
elif isinstance(expression.right, exp.Coalesce):
coalesce = expression.right
other = expression.left
else:
return expression

# This transformation is valid for non-constants,
# but it really only does anything if they are both constants.
if not isinstance(other, CONSTANTS):
return expression

# Find the first constant arg
for arg_index, arg in enumerate(coalesce.expressions):
if isinstance(arg, CONSTANTS):
break
else:
return expression

coalesce.set("expressions", coalesce.expressions[:arg_index])

# Remove the COALESCE function. This is an optimization, skipping a simplify iteration,
# since we already remove COALESCE at the top of this function.
coalesce = coalesce if coalesce.expressions else coalesce.this

# This expression is more complex than when we started, but it will get simplified further
return exp.or_(
exp.and_(
coalesce.is_(exp.null()).not_(),
expression,
),
exp.and_(
coalesce.is_(exp.null()),
type(expression)(this=arg.copy(), expression=other.copy()),
),
)


def remove_where_true(expression):
for where in expression.find_all(exp.Where):
if always_true(where.this):
Expand Down
33 changes: 33 additions & 0 deletions tests/fixtures/optimizer/simplify.sql
Original file line number Diff line number Diff line change
Expand Up @@ -602,3 +602,36 @@ TRUE;

x = 2018 OR x <> 2018;
x <> 2018 OR x = 2018;

--------------------------------------
-- Coalesce
--------------------------------------
COALESCE(x);
x;

COALESCE(x, 1) = 2;
x = 2 AND NOT x IS NULL;

2 = COALESCE(x, 1);
2 = x AND NOT x IS NULL;

COALESCE(x, 1, 1) = 1 + 1;
x = 2 AND NOT x IS NULL;

COALESCE(x, 1, 2) = 2;
x = 2 AND NOT x IS NULL;

COALESCE(x, 3) <= 2;
x <= 2 AND NOT x IS NULL;

COALESCE(x, 1) <> 2;
x <> 2 OR x IS NULL;

COALESCE(x, 1) <= 2;
x <= 2 OR x IS NULL;

COALESCE(x, 1) = 1;
x = 1 OR x IS NULL;

COALESCE(x, 1) IS NULL;
FALSE;
8 changes: 5 additions & 3 deletions tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10833,9 +10833,11 @@ LEFT JOIN "ws"
AND "ws"."ws_item_sk" = "ss"."ss_item_sk"
AND "ws"."ws_sold_year" = "ss"."ss_sold_year"
WHERE
"ss"."ss_sold_year" = 1999
AND COALESCE("cs"."cs_qty", 0) > 0
AND COALESCE("ws"."ws_qty", 0) > 0
"cs"."cs_qty" > 0
AND "ss"."ss_sold_year" = 1999
AND "ws"."ws_qty" > 0
AND NOT "cs"."cs_qty" IS NULL
AND NOT "ws"."ws_qty" IS NULL
ORDER BY
"ss_item_sk",
"ss"."ss_qty" DESC,
Expand Down

0 comments on commit 6e2705e

Please sign in to comment.