From b9a64d6cac28c43f8360a59a3c2e3e7b4376fa3b Mon Sep 17 00:00:00 2001 From: Kyungho Byoun Date: Sat, 11 Nov 2023 19:39:32 +0900 Subject: [PATCH] Fix #377 #390 #394 (#440) * fix #394 and add test * fix #390 and add test * fix #377 and add test * fix 'NoneType' object has no attribute 'normalized' * formatted by black * remove unnecessaries --------- Co-authored-by: Maciej Brencz --- sql_metadata/generalizator.py | 3 ++- sql_metadata/parser.py | 12 ++++++---- sql_metadata/token.py | 9 ++++++++ test/test_comments.py | 8 +++++++ test/test_getting_columns.py | 42 +++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 5 deletions(-) diff --git a/sql_metadata/generalizator.py b/sql_metadata/generalizator.py index 5cdb0937..8f0678c7 100644 --- a/sql_metadata/generalizator.py +++ b/sql_metadata/generalizator.py @@ -47,7 +47,8 @@ def without_comments(self) -> str: :rtype: str """ sql = sqlparse.format(self._raw_query, strip_comments=True) - sql = re.sub(r"\s{2,}", " ", sql) + sql = sql.replace("\n", " ") + sql = re.sub(r"[ \t]+", " ", sql) return sql @property diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index 96371100..a3d3a289 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -533,12 +533,16 @@ def subqueries(self) -> Dict: ): current_subquery.append(inner_token) inner_token = inner_token.next_token + + query_name = None if inner_token.next_token.value in self.subqueries_names: query_name = inner_token.next_token.value - else: + elif inner_token.next_token.is_as_keyword: query_name = inner_token.next_token.next_token.value + subquery_text = "".join([x.stringified_token for x in current_subquery]) - subqueries[query_name] = subquery_text + if query_name is not None: + subqueries[query_name] = subquery_text token = token.next_token @@ -622,7 +626,7 @@ def without_comments(self) -> str: """ Removes comments from SQL query """ - return Generalizator(self.query).without_comments + return Generalizator(self._raw_query).without_comments @property def generalize(self) -> str: @@ -865,7 +869,7 @@ def _determine_opening_parenthesis_type(self, token: SQLToken): # we are in columns and in a column subquery definition token.is_column_definition_start = True elif ( - token.previous_token.is_as_keyword + token.previous_token_not_comment.is_as_keyword and token.last_keyword_normalized != "WINDOW" ): # window clause also contains AS keyword, but it is not a query diff --git a/sql_metadata/token.py b/sql_metadata/token.py index 8a556eab..694d93ce 100644 --- a/sql_metadata/token.py +++ b/sql_metadata/token.py @@ -369,6 +369,15 @@ def next_token_not_comment(self): return self.next_token.next_token_not_comment return self.next_token + @property + def previous_token_not_comment(self): + """ + Property returning previous non-comment token + """ + if self.previous_token and self.previous_token.is_comment: + return self.previous_token.previous_token_not_comment + return self.previous_token + def is_constraint_definition_inside_create_table_clause( self, query_type: str ) -> bool: diff --git a/test/test_comments.py b/test/test_comments.py index b41380f8..9a93bb5a 100644 --- a/test/test_comments.py +++ b/test/test_comments.py @@ -205,3 +205,11 @@ def test_next_token_not_comment_on_non_comments(): select_tok.next_token.next_token == select_tok.next_token_not_comment.next_token_not_comment ) + + +def test_without_comments_for_multiline_query(): + query = """SELECT * -- comment + FROM table + WHERE table.id = '123'""" + parser = Parser(query) + assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'""" diff --git a/test/test_getting_columns.py b/test/test_getting_columns.py index 09a02197..8f6c20b5 100644 --- a/test/test_getting_columns.py +++ b/test/test_getting_columns.py @@ -264,6 +264,14 @@ def test_columns_with_comments(): "order_by": ["cl_sortkey"], } + parser = Parser( + """WITH aa AS --sdfsdfsdf + (SELECT C1, C2 FROM T1) + SELECT C1, C2 FROM aa""" + ) + assert parser.columns == ["C1", "C2"] + assert parser.columns_dict == {"select": ["C1", "C2"]} + def test_columns_with_keyword_aliases(): parser = Parser( @@ -477,3 +485,37 @@ def test_having_columns(): "group_by": ["Country"], "having": ["CustomerID"], } + + +def test_nested_queries(): + query = """ + SELECT max(dt) FROM + ( + SELECT max(dt) as dt FROM t + UNION ALL + SELECT max(dt) as dt FROM t2 + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]} + + query = """ + SELECT max(dt) FROM + ( + SELECT max(dt) as dt FROM t + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]} + + query = """ + SELECT max(dt) FROM + ( + SELECT dt FROM t + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]}