Skip to content

Commit

Permalink
Refactor(hive): improve transpilation of TO_JSON (#1809)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgesittas authored Jun 20, 2023
1 parent b8168bc commit 458f12d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
14 changes: 9 additions & 5 deletions sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,17 @@ def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:

def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str:
this = expression.this
if not this.type:
from sqlglot.optimizer.annotate_types import annotate_types
if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string:
# Since FROM_JSON requires a nested type, we always wrap the json string with
# an array to ensure that "naked" strings like "'a'" will be handled correctly
wrapped_json = exp.Literal.string(f"[{this.this.name}]")

annotate_types(this)
from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json))
to_json = self.func("TO_JSON", from_json)

# This strips the [, ] delimiters of the dummy array printed by TO_JSON
return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1")

if this.type.is_type("json"):
return self.sql(this)
return self.func("TO_JSON", this, expression.args.get("options"))


Expand Down
23 changes: 17 additions & 6 deletions tests/dialects/test_presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,14 +670,25 @@ def test_presto(self):
"spark": "TO_JSON(x)",
},
)

self.validate_all(
"JSON_FORMAT(JSON 'x')",
"""JSON_FORMAT(JSON '"x"')""",
write={
"bigquery": """TO_JSON_STRING(CAST('"x"' AS JSON))""",
"duckdb": """CAST(TO_JSON(CAST('"x"' AS JSON)) AS TEXT)""",
"presto": """JSON_FORMAT(CAST('"x"' AS JSON))""",
"spark": """REGEXP_EXTRACT(TO_JSON(FROM_JSON('["x"]', SCHEMA_OF_JSON('["x"]'))), '^.(.*).$', 1)""",
},
)
self.validate_all(
"""SELECT JSON_FORMAT(JSON '{"a": 1, "b": "c"}')""",
write={
"spark": """SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[{"a": 1, "b": "c"}]', SCHEMA_OF_JSON('[{"a": 1, "b": "c"}]'))), '^.(.*).$', 1)""",
},
)
self.validate_all(
"""SELECT JSON_FORMAT(JSON '[1, 2, 3]')""",
write={
"bigquery": "TO_JSON_STRING(CAST('x' AS JSON))",
"duckdb": "CAST(TO_JSON(CAST('x' AS JSON)) AS TEXT)",
"presto": "JSON_FORMAT(CAST('x' AS JSON))",
"spark": "TO_JSON('x')",
"spark": "SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[[1, 2, 3]]', SCHEMA_OF_JSON('[[1, 2, 3]]'))), '^.(.*).$', 1)",
},
)

Expand Down

0 comments on commit 458f12d

Please sign in to comment.