Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore quantifiers when splitting comma-separated regexes #8898

Merged
merged 12 commits into from
Jul 30, 2023
2 changes: 1 addition & 1 deletion doc/data/messages/i/invalid-name/details.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Before pylint 3.0, most predefined patterns also enforced a minimum length
of three characters. If this behavior is desired in versions 3.0 and following,
it can be had by providing custom regular expressions as described next. (Or,
if the ``disallowed-name`` check is sufficient instead of ``invalid-name``,
providing the single option ``bad-names-rgxs="^..?$"`` will suffice to fail 1-2
providing the single option ``bad-names-rgx="^..?$"`` will suffice to fail 1-2
character names.

Custom regular expressions
Expand Down
2 changes: 1 addition & 1 deletion doc/whatsnew/fragments/2018.user_action
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ and name length, and users regularly reported this to be surprising.)
If checking for a minimum length is still desired, it can be regained in two ways:

- If you are content with a ``disallowed-name`` message (instead of ``invalid-name``),
then simply add the option ``bad-names-rgxs="^..?$"``, which will fail 1-2
then simply add the option ``bad-names-rgx="^..?$"``, which will fail 1-2
character-long names. (Ensure you enable ``disallowed-name``.)

- If you would prefer an ``invalid-name`` message to be emitted, or would prefer
Expand Down
5 changes: 5 additions & 0 deletions doc/whatsnew/fragments/7229.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
When parsing comma-separated lists of regular expressions in the config, ignore
commas that are inside braces since those indicate quantifiers, not delineation
between expressions.

Closes #7229
2 changes: 1 addition & 1 deletion pylint/config/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _regex_transformer(value: str) -> Pattern[str]:
def _regexp_csv_transfomer(value: str) -> Sequence[Pattern[str]]:
"""Transforms a comma separated list of regular expressions."""
patterns: list[Pattern[str]] = []
for pattern in _csv_transformer(value):
for pattern in pylint_utils._check_regexp_csv(value):
patterns.append(_regex_transformer(pattern))
return patterns

Expand Down
2 changes: 2 additions & 0 deletions pylint/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
HAS_ISORT_5,
IsortDriver,
_check_csv,
_check_regexp_csv,
_splitstrip,
_unquote,
decoding_stream,
Expand All @@ -32,6 +33,7 @@
"HAS_ISORT_5",
"IsortDriver",
"_check_csv",
"_check_regexp_csv",
"_splitstrip",
"_unquote",
"decoding_stream",
Expand Down
26 changes: 25 additions & 1 deletion pylint/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import textwrap
import tokenize
import warnings
from collections.abc import Sequence
from collections import deque
from collections.abc import Iterable, Sequence
from io import BufferedReader, BytesIO
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -253,6 +254,29 @@ def _check_csv(value: list[str] | tuple[str] | str) -> Sequence[str]:
return _splitstrip(value)


def _check_regexp_csv(value: list[str] | tuple[str] | str) -> Iterable[str]:
jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(value, (list, tuple)):
yield from value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codecov message looks important since that line yields a value and apparently isn’t covered?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No current code path sends a list of strings to this util, or the one above it (also missing coverage for the analogous line). Should we just delete these lines? Not certain--I think it makes sense to let this util be this permissive. Also helps guard against unexpected input. I'm inclined to leave as is. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think taking the time to try to understand is not worth it vs just not touching it.

else:
# None is a sentinel value here
regexps: deque[deque[str] | None] = deque([None])
open_braces = False
for char in value:
if char == "{":
open_braces = True
elif char == "}" and open_braces:
open_braces = False

if char == "," and not open_braces:
regexps.append(None)
elif regexps[-1] is None:
regexps.pop()
regexps.append(deque([char]))
else:
regexps[-1].append(char)
yield from ("".join(regexp).strip() for regexp in regexps if regexp is not None)


def _comment(string: str) -> str:
"""Return string as a comment."""
lines = [line.strip() for line in string.splitlines()]
Expand Down
31 changes: 29 additions & 2 deletions tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from __future__ import annotations

import os
import re
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any

import pytest
from pytest import CaptureFixture
Expand Down Expand Up @@ -115,6 +117,31 @@ def test_unknown_py_version(capsys: CaptureFixture) -> None:
assert "the-newest has an invalid format, should be a version string." in output.err


CSV_REGEX_COMMA_CASES = [
("foo", ["foo"]),
("foo,bar", ["foo", "bar"]),
("foo, bar", ["foo", "bar"]),
("foo, bar{1,3}", ["foo", "bar{1,3}"]),
]


@pytest.mark.parametrize("in_string,expected", CSV_REGEX_COMMA_CASES)
def test_csv_regex_comma_in_quantifier(in_string: str, expected: list[str]) -> None:
"""Check that we correctly parse a comma-separated regex when there are one
or more commas within quantifier expressions.
"""

def _template_run(in_string: str) -> list[re.Pattern[Any]]:
r = Run(
[str(EMPTY_MODULE), rf"--bad-names-rgx={in_string}"],
exit=False,
)
bad_names_rgxs: list[re.Pattern[Any]] = r.linter.config.bad_names_rgxs
return bad_names_rgxs

assert _template_run(in_string) == [re.compile(regex) for regex in expected]


def test_regex_error(capsys: CaptureFixture) -> None:
"""Check that we correctly error when an an option is passed whose value is an invalid regular expression."""
with pytest.raises(SystemExit):
Expand All @@ -137,12 +164,12 @@ def test_csv_regex_error(capsys: CaptureFixture) -> None:
"""
with pytest.raises(SystemExit):
Run(
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,3})"],
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,}, foo{1,3}})"],
exit=False,
)
output = capsys.readouterr()
assert (
r"Error in provided regular expression: (foo{1 beginning at index 0: missing ), unterminated subpattern"
r"Error in provided regular expression: (foo{1,} beginning at index 0: missing ), unterminated subpattern"
in output.err
)

Expand Down