Fix(clickhouse): map ApproxDistinct to uniq, AnyValue to any (#1635)

* Fix(clickhouse): map ApproxDistinct to uniq, AnyValue to any * Make Clickhouse -> Bigquery work too * Fixup * Fixup * Use _sql_names to get rid of FUNCTIONS entry for ApproxDistinct
tobymao · May 16, 2023 · bba360c · bba360c
1 parent 06d6990
commit bba360c
Show file tree

Hide file tree

Showing 7 changed files with 18 additions and 3 deletions.
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
@@ -247,6 +247,7 @@ class Generator(generator.Generator):
 
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,  # type: ignore
+            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
             exp.ArraySize: rename_func("ARRAY_LENGTH"),
             exp.AtTimeZone: lambda self, e: self.func(
                 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))

diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py
@@ -52,8 +52,10 @@ class Tokenizer(tokens.Tokenizer):
     class Parser(parser.Parser):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,  # type: ignore
+            "ANY": exp.AnyValue.from_arg_list,
             "MAP": parse_var_map,
             "MATCH": exp.RegexpLike.from_arg_list,
+            "UNIQ": exp.ApproxDistinct.from_arg_list,
         }
 
         FUNCTION_PARSERS = {
@@ -63,6 +65,9 @@ class Parser(parser.Parser):
 
         FUNCTION_PARSERS.pop("MATCH")
 
+        NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
+        NO_PAREN_FUNCTION_PARSERS.pop(TokenType.ANY)
+
         RANGE_PARSERS = {
             **parser.Parser.RANGE_PARSERS,
             TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
@@ -265,6 +270,8 @@ class Generator(generator.Generator):
 
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,  # type: ignore
+            exp.AnyValue: rename_func("any"),
+            exp.ApproxDistinct: rename_func("uniq"),
             exp.Array: inline_array_sql,
             exp.CastToStrType: rename_func("CAST"),
             exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",

diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
@@ -115,7 +115,6 @@ class Tokenizer(tokens.Tokenizer):
     class Parser(parser.Parser):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,  # type: ignore
-            "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
             "ARRAY_LENGTH": exp.ArraySize.from_arg_list,
             "ARRAY_SORT": exp.SortArray.from_arg_list,
             "ARRAY_REVERSE_SORT": _sort_array_reverse,

diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
@@ -226,7 +226,6 @@ class Parser(parser.Parser):
 
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,  # type: ignore
-            "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
             "BASE64": exp.ToBase64.from_arg_list,
             "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
             "DATE_ADD": lambda args: exp.TsOrDsAdd(

diff --git a/sqlglot/dialects/starrocks.py b/sqlglot/dialects/starrocks.py
@@ -14,7 +14,6 @@ class StarRocks(MySQL):
     class Parser(MySQL.Parser):  # type: ignore
         FUNCTIONS = {
             **MySQL.Parser.FUNCTIONS,
-            "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
             "DATE_TRUNC": lambda args: exp.TimestampTrunc(
                 this=seq_get(args, 1), unit=seq_get(args, 0)
             ),

diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py
@@ -3566,6 +3566,7 @@ class Hll(AggFunc):
 
 class ApproxDistinct(AggFunc):
     arg_types = {"this": True, "accuracy": False}
+    _sql_names = ["APPROX_DISTINCT", "APPROX_COUNT_DISTINCT"]
 
 
 class Array(Func):

diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py
@@ -37,6 +37,15 @@ def test_clickhouse(self):
             "CREATE TABLE test (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()"
         )
 
+        self.validate_all(
+            "SELECT uniq(x) FROM (SELECT any(y) AS x FROM (SELECT 1 AS y))",
+            read={
+                "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) x FROM (SELECT 1 y))",
+            },
+            write={
+                "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) AS x FROM (SELECT 1 AS y))",
+            },
+        )
         self.validate_all(
             "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
             write={