Skip to content

Commit

Permalink
Fix(clickhouse): map ApproxDistinct to uniq, AnyValue to any (#1635)
Browse files Browse the repository at this point in the history
* Fix(clickhouse): map ApproxDistinct to uniq, AnyValue to any

* Make Clickhouse -> Bigquery work too

* Fixup

* Fixup

* Use _sql_names to get rid of FUNCTIONS entry for ApproxDistinct
  • Loading branch information
georgesittas authored May 16, 2023
1 parent 06d6990 commit bba360c
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 3 deletions.
1 change: 1 addition & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ class Generator(generator.Generator):

TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
exp.ArraySize: rename_func("ARRAY_LENGTH"),
exp.AtTimeZone: lambda self, e: self.func(
"TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
Expand Down
7 changes: 7 additions & 0 deletions sqlglot/dialects/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ class Tokenizer(tokens.Tokenizer):
class Parser(parser.Parser):
FUNCTIONS = {
**parser.Parser.FUNCTIONS, # type: ignore
"ANY": exp.AnyValue.from_arg_list,
"MAP": parse_var_map,
"MATCH": exp.RegexpLike.from_arg_list,
"UNIQ": exp.ApproxDistinct.from_arg_list,
}

FUNCTION_PARSERS = {
Expand All @@ -63,6 +65,9 @@ class Parser(parser.Parser):

FUNCTION_PARSERS.pop("MATCH")

NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
NO_PAREN_FUNCTION_PARSERS.pop(TokenType.ANY)

RANGE_PARSERS = {
**parser.Parser.RANGE_PARSERS,
TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
Expand Down Expand Up @@ -265,6 +270,8 @@ class Generator(generator.Generator):

TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore
exp.AnyValue: rename_func("any"),
exp.ApproxDistinct: rename_func("uniq"),
exp.Array: inline_array_sql,
exp.CastToStrType: rename_func("CAST"),
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
Expand Down
1 change: 0 additions & 1 deletion sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class Tokenizer(tokens.Tokenizer):
class Parser(parser.Parser):
FUNCTIONS = {
**parser.Parser.FUNCTIONS, # type: ignore
"APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
"ARRAY_LENGTH": exp.ArraySize.from_arg_list,
"ARRAY_SORT": exp.SortArray.from_arg_list,
"ARRAY_REVERSE_SORT": _sort_array_reverse,
Expand Down
1 change: 0 additions & 1 deletion sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ class Parser(parser.Parser):

FUNCTIONS = {
**parser.Parser.FUNCTIONS, # type: ignore
"APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
"BASE64": exp.ToBase64.from_arg_list,
"COLLECT_LIST": exp.ArrayAgg.from_arg_list,
"DATE_ADD": lambda args: exp.TsOrDsAdd(
Expand Down
1 change: 0 additions & 1 deletion sqlglot/dialects/starrocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class StarRocks(MySQL):
class Parser(MySQL.Parser): # type: ignore
FUNCTIONS = {
**MySQL.Parser.FUNCTIONS,
"APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
"DATE_TRUNC": lambda args: exp.TimestampTrunc(
this=seq_get(args, 1), unit=seq_get(args, 0)
),
Expand Down
1 change: 1 addition & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3566,6 +3566,7 @@ class Hll(AggFunc):

class ApproxDistinct(AggFunc):
arg_types = {"this": True, "accuracy": False}
_sql_names = ["APPROX_DISTINCT", "APPROX_COUNT_DISTINCT"]


class Array(Func):
Expand Down
9 changes: 9 additions & 0 deletions tests/dialects/test_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ def test_clickhouse(self):
"CREATE TABLE test (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()"
)

self.validate_all(
"SELECT uniq(x) FROM (SELECT any(y) AS x FROM (SELECT 1 AS y))",
read={
"bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) x FROM (SELECT 1 y))",
},
write={
"bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) AS x FROM (SELECT 1 AS y))",
},
)
self.validate_all(
"SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
write={
Expand Down

0 comments on commit bba360c

Please sign in to comment.