Skip to content

Commit

Permalink
Fix: spark clustered by dml
Browse files Browse the repository at this point in the history
  • Loading branch information
tobymao committed Jun 27, 2023
1 parent b4cd239 commit b60e19b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 2 deletions.
6 changes: 6 additions & 0 deletions sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ class Tokenizer(tokens.Tokenizer):
"ADD FILES": TokenType.COMMAND,
"ADD JAR": TokenType.COMMAND,
"ADD JARS": TokenType.COMMAND,
"CLUSTERED BY": TokenType.CLUSTER_BY,
"MSCK REPAIR": TokenType.COMMAND,
"WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
}
Expand Down Expand Up @@ -436,3 +437,8 @@ def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]:
self.sql(expression, "sort"),
self.sql(expression, "cluster"),
]

def cluster_sql(self, expression: exp.Cluster) -> str:
if isinstance(expression.parent, exp.Properties):
return f"CLUSTERED BY ({self.expressions(expression, flat=True)})"
return self.op_expressions("CLUSTER BY", expression)
12 changes: 10 additions & 2 deletions sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ class Parser(metaclass=_Parser):
"CHARACTER SET": lambda self: self._parse_character_set(),
"CHECKSUM": lambda self: self._parse_checksum(),
"CLUSTER BY": lambda self: self._parse_cluster(),
"CLUSTERED BY": lambda self: self._parse_cluster(dml=True),
"COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
"COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
"COPY": lambda self: self._parse_copy_property(),
Expand Down Expand Up @@ -1426,8 +1427,15 @@ def _parse_checksum(self) -> exp.ChecksumProperty:

return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))

def _parse_cluster(self) -> t.Optional[exp.Cluster]:
return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
def _parse_cluster(self, dml: bool = False) -> t.Optional[exp.Cluster]:
if dml:
self._match_l_paren()
expressions = self._parse_csv(self._parse_ordered)
self._match_r_paren()
else:
expressions = self._parse_csv(self._parse_ordered)

return self.expression(exp.Cluster, expressions=expressions)

def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
if not self._match_text_seq("GRANTS"):
Expand Down
1 change: 1 addition & 0 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class TestSpark(Validator):
def test_ddl(self):
self.validate_identity("CREATE TABLE foo (col VARCHAR(50))")
self.validate_identity("CREATE TABLE foo (col STRUCT<struct_col_a: VARCHAR((50))>)")
self.validate_identity("CREATE TABLE foo (col STRING) CLUSTERED BY (col)")

self.validate_all(
"CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)",
Expand Down

0 comments on commit b60e19b

Please sign in to comment.