From c457abd5f097dd13fb21543381e7cfafe7d31cfb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 20 Mar 2023 08:33:46 +0100 Subject: [PATCH] Remove unnecessary parts in regex for bad escaping. The regex tried to deal with situations where escaping in the SQL to be parsed was suspicious. --- CHANGELOG | 10 ++++++++++ sqlparse/keywords.py | 4 ++-- tests/test_split.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 94864138..880a9ca9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,22 @@ Development Version ------------------- +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + parser where a regular expression vulnerable to ReDOS (Regular + Expression Denial of Service) was used. See the security advisory + for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2 + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + Bug Fixes * Revert a change from 0.4.0 that changed IN to be a comparison (issue694). The primary expectation is that IN is treated as a keyword and not as a comparison operator. That also follows the definition of reserved keywords for the major SQL syntax definitions. +* Fix regular expressions for string parsing. Other diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f85d4688..b45f3e0f 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -59,9 +59,9 @@ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), + (r"'(''|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: - (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), + (r'"(""|\\"|[^"])*"', tokens.String.Symbol), (r'(""|".*?[^\\]")', tokens.String.Symbol), # sqlite names can be escaped with [square brackets]. left bracket # cannot be preceded by word character or a right bracket -- diff --git a/tests/test_split.py b/tests/test_split.py index a9d75765..e79750e8 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -18,8 +18,8 @@ def test_split_semicolon(): def test_split_backslash(): - stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';") - assert len(stmts) == 3 + stmts = sqlparse.parse("select '\'; select '\'';") + assert len(stmts) == 2 @pytest.mark.parametrize('fn', ['function.sql',