From 6720b4099feeec12e9a536c9c52f36a02f0890d7 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 19 Dec 2016 13:58:11 -0800 Subject: [PATCH 1/2] [sqllab] bugfix visualizing a query with a semi-colon --- superset/sql_lab.py | 31 +++++-------------------------- superset/sql_parse.py | 32 +++++++++++++++++++++++++++++++- superset/utils.py | 3 +++ superset/views.py | 4 +++- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 1bb1d0b73404e..216905aca9dfa 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -11,7 +11,8 @@ from sqlalchemy.orm import sessionmaker from superset import ( - app, db, models, utils, dataframe, results_backend, sql_parse) + app, db, models, utils, dataframe, results_backend) +from superset.sql_parse import SupersetQuery from superset.db_engine_specs import LimitMethod from superset.jinja_context import get_template_processor QueryStatus = models.QueryStatus @@ -40,28 +41,6 @@ def dedup(l, suffix='__'): return new_l -def create_table_as(sql, table_name, override=False): - """Reformats the query into the create table as query. - - Works only for the single select SQL statements, in all other cases - the sql query is not modified. - :param superset_query: string, sql query that will be executed - :param table_name: string, will contain the results of the query execution - :param override, boolean, table table_name will be dropped if true - :return: string, create table as query - """ - # TODO(bkyryliuk): enforce that all the columns have names. Presto requires it - # for the CTA operation. - # TODO(bkyryliuk): drop table if allowed, check the namespace and - # the permissions. - # TODO raise if multi-statement - exec_sql = '' - if override: - exec_sql = 'DROP TABLE IF EXISTS {table_name};\n' - exec_sql += "CREATE TABLE {table_name} AS \n{sql}" - return exec_sql.format(**locals()) - - @celery_app.task(bind=True) def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" @@ -76,7 +55,6 @@ def get_sql_results(self, query_id, return_results=True, store_results=False): session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database - executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): @@ -88,7 +66,8 @@ def handle_error(msg): raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. - superset_query = sql_parse.SupersetQuery(executed_sql) + superset_query = SupersetQuery(query.sql) + executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") @@ -102,7 +81,7 @@ def handle_error(msg): query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) - executed_sql = create_table_as( + executed_sql = superset_query.as_create_table( executed_sql, query.tmp_table_name) query.select_as_cta_used = True elif ( diff --git a/superset/sql_parse.py b/superset/sql_parse.py index 8f2c6e018b8e1..5b6d86990d7b7 100644 --- a/superset/sql_parse.py +++ b/superset/sql_parse.py @@ -14,7 +14,8 @@ def __init__(self, sql_statement): self._table_names = set() self._alias_names = set() # TODO: multistatement support - for statement in sqlparse.parse(self.sql): + self._parsed = sqlparse.parse(self.sql) + for statement in self._parsed: self.__extract_from_token(statement) self._table_names = self._table_names - self._alias_names @@ -26,6 +27,13 @@ def tables(self): def is_select(self): return self.sql.upper().startswith('SELECT') + def stripped(self): + sql = self.sql + if sql: + while sql[-1] in (' ', ';', '\n', '\t'): + sql = sql[:-1] + return sql + @staticmethod def __precedes_table_name(token_value): for keyword in PRECEDES_TABLE_NAME: @@ -67,6 +75,28 @@ def __process_identifier(self, identifier): self._alias_names.add(identifier.tokens[0].value) self.__extract_from_token(identifier) + def as_create_table(self, table_name, override=False): + """Reformats the query into the create table as query. + + Works only for the single select SQL statements, in all other cases + the sql query is not modified. + :param superset_query: string, sql query that will be executed + :param table_name: string, will contain the results of the query execution + :param override, boolean, table table_name will be dropped if true + :return: string, create table as query + """ + # TODO(bkyryliuk): enforce that all the columns have names. Presto requires it + # for the CTA operation. + # TODO(bkyryliuk): drop table if allowed, check the namespace and + # the permissions. + # TODO raise if multi-statement + exec_sql = '' + sql = self.stripped() + if override: + exec_sql = 'DROP TABLE IF EXISTS {table_name};\n' + exec_sql += "CREATE TABLE {table_name} AS \n{sql}" + return exec_sql.format(**locals()) + def __extract_from_token(self, token): if not hasattr(token, 'tokens'): return diff --git a/superset/utils.py b/superset/utils.py index 9239b71b581ae..571435139118a 100644 --- a/superset/utils.py +++ b/superset/utils.py @@ -401,3 +401,6 @@ def ping_connection(dbapi_connection, connection_record, connection_proxy): except: raise exc.DisconnectionError() cursor.close() + +def strip_sql(sql): + return sql diff --git a/superset/views.py b/superset/views.py index 34709a1720dc8..3f4dd66310fc2 100755 --- a/superset/views.py +++ b/superset/views.py @@ -40,6 +40,7 @@ ) from superset.source_registry import SourceRegistry from superset.models import DatasourceAccessRequest as DAR +from superset.sql_parse import SupersetQuery config = app.config log_this = models.Log.log_this @@ -2220,7 +2221,8 @@ def sqllab_viz(self): if not table: table = models.SqlaTable(table_name=table_name) table.database_id = data.get('dbId') - table.sql = data.get('sql') + q = SupersetQuery(data.get('sql')) + table.sql = q.stripped() db.session.add(table) cols = [] dims = [] From 86066aa6486841994fa601ad738d3f107445f922 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 19 Dec 2016 20:16:57 -0800 Subject: [PATCH 2/2] Fixing tests --- superset/sql_lab.py | 3 +-- superset/sql_parse.py | 13 +++++++------ superset/utils.py | 3 --- tests/celery_tests.py | 40 ++++++++++++++++++---------------------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 216905aca9dfa..d9c33a2b2990d 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -81,8 +81,7 @@ def handle_error(msg): query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) - executed_sql = superset_query.as_create_table( - executed_sql, query.tmp_table_name) + executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif ( query.limit and superset_query.is_select() and diff --git a/superset/sql_parse.py b/superset/sql_parse.py index 5b6d86990d7b7..0b28f23e6e468 100644 --- a/superset/sql_parse.py +++ b/superset/sql_parse.py @@ -75,24 +75,25 @@ def __process_identifier(self, identifier): self._alias_names.add(identifier.tokens[0].value) self.__extract_from_token(identifier) - def as_create_table(self, table_name, override=False): + def as_create_table(self, table_name, overwrite=False): """Reformats the query into the create table as query. Works only for the single select SQL statements, in all other cases the sql query is not modified. :param superset_query: string, sql query that will be executed - :param table_name: string, will contain the results of the query execution - :param override, boolean, table table_name will be dropped if true + :param table_name: string, will contain the results of the + query execution + :param overwrite, boolean, table table_name will be dropped if true :return: string, create table as query """ - # TODO(bkyryliuk): enforce that all the columns have names. Presto requires it - # for the CTA operation. + # TODO(bkyryliuk): enforce that all the columns have names. + # Presto requires it for the CTA operation. # TODO(bkyryliuk): drop table if allowed, check the namespace and # the permissions. # TODO raise if multi-statement exec_sql = '' sql = self.stripped() - if override: + if overwrite: exec_sql = 'DROP TABLE IF EXISTS {table_name};\n' exec_sql += "CREATE TABLE {table_name} AS \n{sql}" return exec_sql.format(**locals()) diff --git a/superset/utils.py b/superset/utils.py index 571435139118a..9239b71b581ae 100644 --- a/superset/utils.py +++ b/superset/utils.py @@ -401,6 +401,3 @@ def ping_connection(dbapi_connection, connection_record, connection_proxy): except: raise exc.DisconnectionError() cursor.close() - -def strip_sql(sql): - return sql diff --git a/tests/celery_tests.py b/tests/celery_tests.py index af58afef6645e..f172e6b1fe544 100644 --- a/tests/celery_tests.py +++ b/tests/celery_tests.py @@ -12,8 +12,9 @@ import pandas as pd -from superset import app, appbuilder, cli, db, models, sql_lab, dataframe +from superset import app, appbuilder, cli, db, models, dataframe from superset.security import sync_role_definitions +from superset.sql_parse import SupersetQuery from .base_tests import SupersetTestCase @@ -35,38 +36,33 @@ class UtilityFunctionTests(SupersetTestCase): # TODO(bkyryliuk): support more cases in CTA function. def test_create_table_as(self): - select_query = "SELECT * FROM outer_space;" - updated_select_query = sql_lab.create_table_as( - select_query, "tmp") + q = SupersetQuery("SELECT * FROM outer_space;") + self.assertEqual( - "CREATE TABLE tmp AS \nSELECT * FROM outer_space;", - updated_select_query) + "CREATE TABLE tmp AS \nSELECT * FROM outer_space", + q.as_create_table("tmp")) - updated_select_query_with_drop = sql_lab.create_table_as( - select_query, "tmp", override=True) self.assertEqual( "DROP TABLE IF EXISTS tmp;\n" - "CREATE TABLE tmp AS \nSELECT * FROM outer_space;", - updated_select_query_with_drop) + "CREATE TABLE tmp AS \nSELECT * FROM outer_space", + q.as_create_table("tmp", overwrite=True)) - select_query_no_semicolon = "SELECT * FROM outer_space" - updated_select_query_no_semicolon = sql_lab.create_table_as( - select_query_no_semicolon, "tmp") + # now without a semicolon + q = SupersetQuery("SELECT * FROM outer_space") self.assertEqual( "CREATE TABLE tmp AS \nSELECT * FROM outer_space", - updated_select_query_no_semicolon) + q.as_create_table("tmp")) + # now a multi-line query multi_line_query = ( "SELECT * FROM planets WHERE\n" - "Luke_Father = 'Darth Vader';") - updated_multi_line_query = sql_lab.create_table_as( - multi_line_query, "tmp") - expected_updated_multi_line_query = ( - "CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n" - "Luke_Father = 'Darth Vader';") + "Luke_Father = 'Darth Vader'") + q = SupersetQuery(multi_line_query) self.assertEqual( - expected_updated_multi_line_query, - updated_multi_line_query) + "CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n" + "Luke_Father = 'Darth Vader'", + q.as_create_table("tmp") + ) class CeleryTestCase(SupersetTestCase):