Skip to content

Commit

Permalink
Refactor(optimizer): factor out pseudocolumns in qualify columns (#1984)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgesittas authored Jul 31, 2023
1 parent be7d4e6 commit 9e77c7b
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
4 changes: 4 additions & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ class BigQuery(Dialect):
"TZH": "%z",
}

# The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement
# https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"}

@classmethod
def normalize_identifier(cls, expression: E) -> E:
# In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ class Dialect(metaclass=_Dialect):
# special syntax cast(x as date format 'yyyy') defaults to time_mapping
FORMAT_MAPPING: t.Dict[str, str] = {}

# Columns that are auto-generated by the engine corresponding to this dialect
# Such columns may be excluded from SELECT * queries, for example
PSEUDOCOLUMNS: t.Set[str] = set()

# Autofilled
tokenizer_class = Tokenizer
parser_class = Parser
Expand Down
18 changes: 8 additions & 10 deletions sqlglot/optimizer/qualify_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def qualify_columns(
"""
schema = ensure_schema(schema)
infer_schema = schema.empty if infer_schema is None else infer_schema
pseudocolumns = Dialect.get_or_raise(schema.dialect).PSEUDOCOLUMNS

for scope in traverse_scope(expression):
resolver = Resolver(scope, schema, infer_schema=infer_schema)
Expand All @@ -55,7 +56,7 @@ def qualify_columns(
_expand_alias_refs(scope, resolver)

if not isinstance(scope.expression, exp.UDTF):
_expand_stars(scope, resolver, using_column_tables)
_expand_stars(scope, resolver, using_column_tables, pseudocolumns)
_qualify_outputs(scope)
_expand_group_by(scope)
_expand_order_by(scope, resolver)
Expand Down Expand Up @@ -326,7 +327,10 @@ def _qualify_columns(scope: Scope, resolver: Resolver) -> None:


def _expand_stars(
scope: Scope, resolver: Resolver, using_column_tables: t.Dict[str, t.Any]
scope: Scope,
resolver: Resolver,
using_column_tables: t.Dict[str, t.Any],
pseudocolumns: t.Set[str],
) -> None:
"""Expand stars to lists of column selections"""

Expand Down Expand Up @@ -367,14 +371,8 @@ def _expand_stars(

columns = resolver.get_source_columns(table, only_visible=True)

# The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement
# https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
if resolver.schema.dialect == "bigquery":
columns = [
name
for name in columns
if name.upper() not in ("_PARTITIONTIME", "_PARTITIONDATE")
]
if pseudocolumns:
columns = [name for name in columns if name.upper() not in pseudocolumns]

if columns and "*" not in columns:
if pivot and has_pivoted_source and pivot_columns and pivot_output_columns:
Expand Down

0 comments on commit 9e77c7b

Please sign in to comment.