Skip to content

Commit

Permalink
Ignore quantifiers when splitting comma-separated regexes (#8898) (#8901
Browse files Browse the repository at this point in the history
)

Do not split on commas if they are between braces, since that indicates
a quantifier. Also added a protection for slow implementations since
existing workarounds may result in long strings of chained regular
expressions.

Adjust existing test for invalid regex to be truly invalid

Co-authored-by: lihu <[email protected]>
(cherry picked from commit d28def9)
  • Loading branch information
jacobtylerwalls authored Jul 30, 2023
1 parent fe161df commit c72a149
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 4 deletions.
5 changes: 5 additions & 0 deletions doc/whatsnew/fragments/7229.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
When parsing comma-separated lists of regular expressions in the config, ignore
commas that are inside braces since those indicate quantifiers, not delineation
between expressions.

Closes #7229
2 changes: 1 addition & 1 deletion pylint/config/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _regex_transformer(value: str) -> Pattern[str]:
def _regexp_csv_transfomer(value: str) -> Sequence[Pattern[str]]:
"""Transforms a comma separated list of regular expressions."""
patterns: list[Pattern[str]] = []
for pattern in _csv_transformer(value):
for pattern in pylint_utils._check_regexp_csv(value):
patterns.append(_regex_transformer(pattern))
return patterns

Expand Down
2 changes: 2 additions & 0 deletions pylint/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
HAS_ISORT_5,
IsortDriver,
_check_csv,
_check_regexp_csv,
_format_option_value,
_splitstrip,
_unquote,
Expand All @@ -34,6 +35,7 @@
"HAS_ISORT_5",
"IsortDriver",
"_check_csv",
"_check_regexp_csv",
"_format_option_value",
"_splitstrip",
"_unquote",
Expand Down
28 changes: 27 additions & 1 deletion pylint/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import textwrap
import tokenize
import warnings
from collections.abc import Sequence
from collections import deque
from collections.abc import Iterable, Sequence
from io import BufferedReader, BytesIO
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -330,6 +331,31 @@ def _check_csv(value: list[str] | tuple[str] | str) -> Sequence[str]:
return _splitstrip(value)


def _check_regexp_csv(value: list[str] | tuple[str] | str) -> Iterable[str]:
r"""Split a comma-separated list of regexps, taking care to avoid splitting
a regex employing a comma as quantifier, as in `\d{1,2}`."""
if isinstance(value, (list, tuple)):
yield from value
else:
# None is a sentinel value here
regexps: deque[deque[str] | None] = deque([None])
open_braces = False
for char in value:
if char == "{":
open_braces = True
elif char == "}" and open_braces:
open_braces = False

if char == "," and not open_braces:
regexps.append(None)
elif regexps[-1] is None:
regexps.pop()
regexps.append(deque([char]))
else:
regexps[-1].append(char)
yield from ("".join(regexp).strip() for regexp in regexps if regexp is not None)


def _comment(string: str) -> str:
"""Return string as a comment."""
lines = [line.strip() for line in string.splitlines()]
Expand Down
31 changes: 29 additions & 2 deletions tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from __future__ import annotations

import os
import re
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any

import pytest
from pytest import CaptureFixture
Expand Down Expand Up @@ -113,6 +115,31 @@ def test_unknown_py_version(capsys: CaptureFixture) -> None:
assert "the-newest has an invalid format, should be a version string." in output.err


CSV_REGEX_COMMA_CASES = [
("foo", ["foo"]),
("foo,bar", ["foo", "bar"]),
("foo, bar", ["foo", "bar"]),
("foo, bar{1,3}", ["foo", "bar{1,3}"]),
]


@pytest.mark.parametrize("in_string,expected", CSV_REGEX_COMMA_CASES)
def test_csv_regex_comma_in_quantifier(in_string: str, expected: list[str]) -> None:
"""Check that we correctly parse a comma-separated regex when there are one
or more commas within quantifier expressions.
"""

def _template_run(in_string: str) -> list[re.Pattern[Any]]:
r = Run(
[str(EMPTY_MODULE), rf"--bad-names-rgx={in_string}"],
exit=False,
)
bad_names_rgxs: list[re.Pattern[Any]] = r.linter.config.bad_names_rgxs
return bad_names_rgxs

assert _template_run(in_string) == [re.compile(regex) for regex in expected]


def test_regex_error(capsys: CaptureFixture) -> None:
"""Check that we correctly error when an an option is passed whose value is an invalid regular expression."""
with pytest.raises(SystemExit):
Expand All @@ -135,12 +162,12 @@ def test_csv_regex_error(capsys: CaptureFixture) -> None:
"""
with pytest.raises(SystemExit):
Run(
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,3})"],
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,}, foo{1,3}})"],
exit=False,
)
output = capsys.readouterr()
assert (
r"Error in provided regular expression: (foo{1 beginning at index 0: missing ), unterminated subpattern"
r"Error in provided regular expression: (foo{1,} beginning at index 0: missing ), unterminated subpattern"
in output.err
)

Expand Down

0 comments on commit c72a149

Please sign in to comment.