Skip to content

Commit

Permalink
Add support for Snowflake column aliases that use SQL keywords (#1632)
Browse files Browse the repository at this point in the history
  • Loading branch information
yoavcloud authored Jan 16, 2025
1 parent 4741500 commit b4b5576
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 56 deletions.
14 changes: 14 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any {
fn supports_set_stmt_without_operator(&self) -> bool {
false
}

/// Returns true if the specified keyword should be parsed as a select item alias.
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
/// to enable looking ahead if needed.
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}

/// Returns true if the specified keyword should be parsed as a table factor alias.
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
/// to enable looking ahead if needed.
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
}
}

/// This represents the operators for which precedence must be defined
Expand Down
45 changes: 45 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect {
fn supports_partiql(&self) -> bool {
true
}

fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
explicit
|| match kw {
// The following keywords can be considered an alias as long as
// they are not followed by other tokens that may change their meaning
// e.g. `SELECT * EXCEPT (col1) FROM tbl`
Keyword::EXCEPT
// e.g. `SELECT 1 LIMIT 5`
| Keyword::LIMIT
// e.g. `SELECT 1 OFFSET 5 ROWS`
| Keyword::OFFSET
// e.g. `INSERT INTO t SELECT 1 RETURNING *`
| Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
{
false
}

// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
// which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
Keyword::FETCH
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
{
false
}

// Reserved keywords by the Snowflake dialect, which seem to be less strictive
// than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following
// keywords were tested with the this statement: `SELECT 1 <KW>`.
Keyword::FROM
| Keyword::GROUP
| Keyword::HAVING
| Keyword::INTERSECT
| Keyword::INTO
| Keyword::MINUS
| Keyword::ORDER
| Keyword::SELECT
| Keyword::UNION
| Keyword::WHERE
| Keyword::WITH => false,

// Any other word is considered an alias
_ => true,
}
}
}

fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {
Expand Down
131 changes: 75 additions & 56 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8838,38 +8838,76 @@ impl<'a> Parser<'a> {
Ok(IdentWithAlias { ident, alias })
}

/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
/// Optionally parses an alias for a select list item
fn maybe_parse_select_item_alias(&mut self) -> Result<Option<Ident>, ParserError> {
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
parser.dialect.is_select_item_alias(explicit, kw, parser)
}
self.parse_optional_alias_inner(None, validator)
}

/// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`.
/// In this case, the alias is allowed to optionally name the columns in the table, in
/// addition to the table itself.
pub fn maybe_parse_table_alias(&mut self) -> Result<Option<TableAlias>, ParserError> {
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
parser.dialect.is_table_factor_alias(explicit, kw, parser)
}
match self.parse_optional_alias_inner(None, validator)? {
Some(name) => {
let columns = self.parse_table_alias_column_defs()?;
Ok(Some(TableAlias { name, columns }))
}
None => Ok(None),
}
}

/// Wrapper for parse_optional_alias_inner, left for backwards-compatibility
/// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias`
/// and `maybe_parse_table_alias`.
pub fn parse_optional_alias(
&mut self,
reserved_kwds: &[Keyword],
) -> Result<Option<Ident>, ParserError> {
fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool {
false
}
self.parse_optional_alias_inner(Some(reserved_kwds), validator)
}

/// Parses an optional alias after a SQL element such as a select list item
/// or a table name.
///
/// This method accepts an optional list of reserved keywords or a function
/// to call to validate if a keyword should be parsed as an alias, to allow
/// callers to customize the parsing logic based on their context.
fn parse_optional_alias_inner<F>(
&mut self,
reserved_kwds: Option<&[Keyword]>,
validator: F,
) -> Result<Option<Ident>, ParserError>
where
F: Fn(bool, &Keyword, &mut Parser) -> bool,
{
let after_as = self.parse_keyword(Keyword::AS);

let next_token = self.next_token();
match next_token.token {
// Accept any identifier after `AS` (though many dialects have restrictions on
// keywords that may appear here). If there's no `AS`: don't parse keywords,
// which may start a construct allowed in this position, to be parsed as aliases.
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
// By default, if a word is located after the `AS` keyword we consider it an alias
// as long as it's not reserved.
Token::Word(w)
if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) =>
{
Ok(Some(w.into_ident(next_token.span)))
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
//
// Note, that this conflicts with an obscure rule from the SQL
// standard, which we don't implement:
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
// "[Obscure Rule] SQL allows you to break a long <character
// string literal> up into two or more smaller <character string
// literal>s, split by a <separator> that includes a newline
// character. When it sees such a <literal>, your DBMS will
// ignore the <separator> and treat the multiple strings as
// a single <literal>."
// This pattern allows for customizing the acceptance of words as aliases based on the caller's
// context, such as to what SQL element this word is a potential alias of (select item alias, table name
// alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords.
Token::Word(w) if validator(after_as, &w.keyword, self) => {
Ok(Some(w.into_ident(next_token.span)))
}
// For backwards-compatibility, we accept quoted strings as aliases regardless of the context.
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
// Support for MySql dialect double-quoted string, `AS "HOUR"` for example
Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
_ => {
if after_as {
Expand All @@ -8881,23 +8919,6 @@ impl<'a> Parser<'a> {
}
}

/// Parse `AS identifier` when the AS is describing a table-valued object,
/// like in `... FROM generate_series(1, 10) AS t (col)`. In this case
/// the alias is allowed to optionally name the columns in the table, in
/// addition to the table itself.
pub fn parse_optional_table_alias(
&mut self,
reserved_kwds: &[Keyword],
) -> Result<Option<TableAlias>, ParserError> {
match self.parse_optional_alias(reserved_kwds)? {
Some(name) => {
let columns = self.parse_table_alias_column_defs()?;
Ok(Some(TableAlias { name, columns }))
}
None => Ok(None),
}
}

pub fn parse_optional_group_by(&mut self) -> Result<Option<GroupByExpr>, ParserError> {
if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
let expressions = if self.parse_keyword(Keyword::ALL) {
Expand Down Expand Up @@ -10899,7 +10920,7 @@ impl<'a> Parser<'a> {
let name = self.parse_object_name(false)?;
self.expect_token(&Token::LParen)?;
let args = self.parse_optional_args()?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Function {
lateral: true,
name,
Expand All @@ -10912,7 +10933,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::TableFunction { expr, alias })
} else if self.consume_token(&Token::LParen) {
// A left paren introduces either a derived table (i.e., a subquery)
Expand Down Expand Up @@ -10961,7 +10982,7 @@ impl<'a> Parser<'a> {
#[allow(clippy::if_same_then_else)]
if !table_and_joins.joins.is_empty() {
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::NestedJoin {
table_with_joins: Box::new(table_and_joins),
alias,
Expand All @@ -10974,7 +10995,7 @@ impl<'a> Parser<'a> {
// (B): `table_and_joins` (what we found inside the parentheses)
// is a nested join `(foo JOIN bar)`, not followed by other joins.
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::NestedJoin {
table_with_joins: Box::new(table_and_joins),
alias,
Expand All @@ -10988,9 +11009,7 @@ impl<'a> Parser<'a> {
// [AS alias])`) as well.
self.expect_token(&Token::RParen)?;

if let Some(outer_alias) =
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
{
if let Some(outer_alias) = self.maybe_parse_table_alias()? {
// Snowflake also allows specifying an alias *after* parens
// e.g. `FROM (mytable) AS alias`
match &mut table_and_joins.relation {
Expand Down Expand Up @@ -11043,7 +11062,7 @@ impl<'a> Parser<'a> {
// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2)
// where there are no parentheses around the VALUES clause.
let values = SetExpr::Values(self.parse_values(false)?);
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Derived {
lateral: false,
subquery: Box::new(Query {
Expand All @@ -11069,7 +11088,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::RParen)?;

let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) {
let alias = match self.maybe_parse_table_alias() {
Ok(Some(alias)) => Some(alias),
Ok(None) => None,
Err(e) => return Err(e),
Expand Down Expand Up @@ -11106,7 +11125,7 @@ impl<'a> Parser<'a> {
let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?;
self.expect_token(&Token::RParen)?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::JsonTable {
json_expr,
json_path,
Expand Down Expand Up @@ -11151,7 +11170,7 @@ impl<'a> Parser<'a> {
}
}

let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;

// MSSQL-specific table hints:
let mut with_hints = vec![];
Expand Down Expand Up @@ -11329,7 +11348,7 @@ impl<'a> Parser<'a> {
} else {
Vec::new()
};
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::OpenJsonTable {
json_expr,
json_path,
Expand Down Expand Up @@ -11428,7 +11447,7 @@ impl<'a> Parser<'a> {

self.expect_token(&Token::RParen)?;

let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;

Ok(TableFactor::MatchRecognize {
table: Box::new(table),
Expand Down Expand Up @@ -11672,7 +11691,7 @@ impl<'a> Parser<'a> {
) -> Result<TableFactor, ParserError> {
let subquery = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Derived {
lateral: match lateral {
Lateral => true,
Expand Down Expand Up @@ -11766,7 +11785,7 @@ impl<'a> Parser<'a> {
};

self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Pivot {
table: Box::new(table),
aggregate_functions,
Expand All @@ -11788,7 +11807,7 @@ impl<'a> Parser<'a> {
self.expect_keyword_is(Keyword::IN)?;
let columns = self.parse_parenthesized_column_list(Mandatory, false)?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Unpivot {
table: Box::new(table),
value,
Expand Down Expand Up @@ -12614,7 +12633,7 @@ impl<'a> Parser<'a> {
})
}
expr => self
.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)
.maybe_parse_select_item_alias()
.map(|alias| match alias {
Some(alias) => SelectItem::ExprWithAlias { expr, alias },
None => SelectItem::UnnamedExpr(expr),
Expand Down
29 changes: 29 additions & 0 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3022,3 +3022,32 @@ fn parse_ls_and_rm() {

snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#);
}

#[test]
fn test_sql_keywords_as_select_item_aliases() {
// Some keywords that should be parsed as an alias
let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"];
for kw in unreserved_kws {
snowflake()
.one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}"));
}

// Some keywords that should not be parsed as an alias
let reserved_kws = vec![
"FROM",
"GROUP",
"HAVING",
"INTERSECT",
"INTO",
"ORDER",
"SELECT",
"UNION",
"WHERE",
"WITH",
];
for kw in reserved_kws {
assert!(snowflake()
.parse_sql_statements(&format!("SELECT 1 {kw}"))
.is_err());
}
}

0 comments on commit b4b5576

Please sign in to comment.