From 9a62d9463005631f2d43b57a0d782453db98931e Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 6 Jan 2017 12:24:07 -0800 Subject: [PATCH] [sqllab] bugfix visualizing a query with a semi-colon (#1869) * [sqllab] bugfix visualizing a query with a semi-colon * Fixing tests --- superset/sql_lab.py | 32 +++++--------------------------- superset/sql_parse.py | 33 ++++++++++++++++++++++++++++++++- superset/views.py | 4 +++- tests/celery_tests.py | 40 ++++++++++++++++++---------------------- 4 files changed, 58 insertions(+), 51 deletions(-) diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 6eb251e3b3d51..94c8b3b46ddcb 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -11,7 +11,8 @@ from sqlalchemy.orm import sessionmaker from superset import ( - app, db, models, utils, dataframe, results_backend, sql_parse) + app, db, models, utils, dataframe, results_backend) +from superset.sql_parse import SupersetQuery from superset.db_engine_specs import LimitMethod from superset.jinja_context import get_template_processor QueryStatus = models.QueryStatus @@ -40,28 +41,6 @@ def dedup(l, suffix='__'): return new_l -def create_table_as(sql, table_name, override=False): - """Reformats the query into the create table as query. - - Works only for the single select SQL statements, in all other cases - the sql query is not modified. - :param superset_query: string, sql query that will be executed - :param table_name: string, will contain the results of the query execution - :param override, boolean, table table_name will be dropped if true - :return: string, create table as query - """ - # TODO(bkyryliuk): enforce that all the columns have names. Presto requires it - # for the CTA operation. - # TODO(bkyryliuk): drop table if allowed, check the namespace and - # the permissions. - # TODO raise if multi-statement - exec_sql = '' - if override: - exec_sql = 'DROP TABLE IF EXISTS {table_name};\n' - exec_sql += "CREATE TABLE {table_name} AS \n{sql}" - return exec_sql.format(**locals()) - - @celery_app.task(bind=True) def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" @@ -76,7 +55,6 @@ def get_sql_results(self, query_id, return_results=True, store_results=False): session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database - executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): @@ -91,7 +69,8 @@ def handle_error(msg): handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. - superset_query = sql_parse.SupersetQuery(executed_sql) + superset_query = SupersetQuery(query.sql) + executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") @@ -105,8 +84,7 @@ def handle_error(msg): query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) - executed_sql = create_table_as( - executed_sql, query.tmp_table_name) + executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif ( query.limit and superset_query.is_select() and diff --git a/superset/sql_parse.py b/superset/sql_parse.py index 8f2c6e018b8e1..0b28f23e6e468 100644 --- a/superset/sql_parse.py +++ b/superset/sql_parse.py @@ -14,7 +14,8 @@ def __init__(self, sql_statement): self._table_names = set() self._alias_names = set() # TODO: multistatement support - for statement in sqlparse.parse(self.sql): + self._parsed = sqlparse.parse(self.sql) + for statement in self._parsed: self.__extract_from_token(statement) self._table_names = self._table_names - self._alias_names @@ -26,6 +27,13 @@ def tables(self): def is_select(self): return self.sql.upper().startswith('SELECT') + def stripped(self): + sql = self.sql + if sql: + while sql[-1] in (' ', ';', '\n', '\t'): + sql = sql[:-1] + return sql + @staticmethod def __precedes_table_name(token_value): for keyword in PRECEDES_TABLE_NAME: @@ -67,6 +75,29 @@ def __process_identifier(self, identifier): self._alias_names.add(identifier.tokens[0].value) self.__extract_from_token(identifier) + def as_create_table(self, table_name, overwrite=False): + """Reformats the query into the create table as query. + + Works only for the single select SQL statements, in all other cases + the sql query is not modified. + :param superset_query: string, sql query that will be executed + :param table_name: string, will contain the results of the + query execution + :param overwrite, boolean, table table_name will be dropped if true + :return: string, create table as query + """ + # TODO(bkyryliuk): enforce that all the columns have names. + # Presto requires it for the CTA operation. + # TODO(bkyryliuk): drop table if allowed, check the namespace and + # the permissions. + # TODO raise if multi-statement + exec_sql = '' + sql = self.stripped() + if overwrite: + exec_sql = 'DROP TABLE IF EXISTS {table_name};\n' + exec_sql += "CREATE TABLE {table_name} AS \n{sql}" + return exec_sql.format(**locals()) + def __extract_from_token(self, token): if not hasattr(token, 'tokens'): return diff --git a/superset/views.py b/superset/views.py index f290ef11a05e0..87bcad686c2e0 100755 --- a/superset/views.py +++ b/superset/views.py @@ -41,6 +41,7 @@ ) from superset.source_registry import SourceRegistry from superset.models import DatasourceAccessRequest as DAR +from superset.sql_parse import SupersetQuery config = app.config log_this = models.Log.log_this @@ -2229,7 +2230,8 @@ def sqllab_viz(self): if not table: table = models.SqlaTable(table_name=table_name) table.database_id = data.get('dbId') - table.sql = data.get('sql') + q = SupersetQuery(data.get('sql')) + table.sql = q.stripped() db.session.add(table) cols = [] dims = [] diff --git a/tests/celery_tests.py b/tests/celery_tests.py index af58afef6645e..f172e6b1fe544 100644 --- a/tests/celery_tests.py +++ b/tests/celery_tests.py @@ -12,8 +12,9 @@ import pandas as pd -from superset import app, appbuilder, cli, db, models, sql_lab, dataframe +from superset import app, appbuilder, cli, db, models, dataframe from superset.security import sync_role_definitions +from superset.sql_parse import SupersetQuery from .base_tests import SupersetTestCase @@ -35,38 +36,33 @@ class UtilityFunctionTests(SupersetTestCase): # TODO(bkyryliuk): support more cases in CTA function. def test_create_table_as(self): - select_query = "SELECT * FROM outer_space;" - updated_select_query = sql_lab.create_table_as( - select_query, "tmp") + q = SupersetQuery("SELECT * FROM outer_space;") + self.assertEqual( - "CREATE TABLE tmp AS \nSELECT * FROM outer_space;", - updated_select_query) + "CREATE TABLE tmp AS \nSELECT * FROM outer_space", + q.as_create_table("tmp")) - updated_select_query_with_drop = sql_lab.create_table_as( - select_query, "tmp", override=True) self.assertEqual( "DROP TABLE IF EXISTS tmp;\n" - "CREATE TABLE tmp AS \nSELECT * FROM outer_space;", - updated_select_query_with_drop) + "CREATE TABLE tmp AS \nSELECT * FROM outer_space", + q.as_create_table("tmp", overwrite=True)) - select_query_no_semicolon = "SELECT * FROM outer_space" - updated_select_query_no_semicolon = sql_lab.create_table_as( - select_query_no_semicolon, "tmp") + # now without a semicolon + q = SupersetQuery("SELECT * FROM outer_space") self.assertEqual( "CREATE TABLE tmp AS \nSELECT * FROM outer_space", - updated_select_query_no_semicolon) + q.as_create_table("tmp")) + # now a multi-line query multi_line_query = ( "SELECT * FROM planets WHERE\n" - "Luke_Father = 'Darth Vader';") - updated_multi_line_query = sql_lab.create_table_as( - multi_line_query, "tmp") - expected_updated_multi_line_query = ( - "CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n" - "Luke_Father = 'Darth Vader';") + "Luke_Father = 'Darth Vader'") + q = SupersetQuery(multi_line_query) self.assertEqual( - expected_updated_multi_line_query, - updated_multi_line_query) + "CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n" + "Luke_Father = 'Darth Vader'", + q.as_create_table("tmp") + ) class CeleryTestCase(SupersetTestCase):