Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --ignore-multiline-regex option. #3476

Merged
merged 1 commit into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 58 additions & 4 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Pattern,
Sequence,
Set,
TextIO,
Tuple,
)

Expand Down Expand Up @@ -201,11 +202,17 @@ def __str__(self) -> str:


class FileOpener:
def __init__(self, use_chardet: bool, quiet_level: int) -> None:
def __init__(
self,
use_chardet: bool,
quiet_level: int,
ignore_multiline_regex: Optional[Pattern[str]],
) -> None:
self.use_chardet = use_chardet
if use_chardet:
self.init_chardet()
self.quiet_level = quiet_level
self.ignore_multiline_regex = ignore_multiline_regex

def init_chardet(self) -> None:
try:
Expand Down Expand Up @@ -247,7 +254,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
)
raise
else:
lines = f.readlines()
lines = self.get_lines(f)
f.close()

return lines, f.encoding
Expand All @@ -262,7 +269,7 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr)
with open(filename, encoding=encoding, newline="") as f:
try:
lines = f.readlines()
lines = self.get_lines(f)
except UnicodeDecodeError:
if not self.quiet_level & QuietLevels.ENCODING:
print(
Expand All @@ -279,6 +286,22 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:

return lines, encoding

def get_lines(self, f: TextIO) -> List[str]:
if self.ignore_multiline_regex:
text = f.read()
pos = 0
text2 = ""
for m in re.finditer(self.ignore_multiline_regex, text):
text2 += text[pos : m.start()]
# Replace with blank lines so line numbers are unchanged.
text2 += "\n" * m.group().count("\n")
pos = m.end()
text2 += text[pos:]
lines = text2.split("\n")
else:
lines = f.readlines()
return lines


# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-

Expand Down Expand Up @@ -411,6 +434,19 @@ def parse_options(
'e.g., "\\bmatch\\b". Defaults to '
"empty/disabled.",
)
parser.add_argument(
"--ignore-multiline-regex",
action="store",
type=str,
help="regular expression that is used to ignore "
"text that may span multi-line regions. "
"The regex is run with re.DOTALL. For example to "
"allow skipping of regions of Python code using "
"begin/end comments one could use: "
"--ignore-multiline-regex "
"'# codespell:ignore-begin *\\n.*# codespell:ignore-end *\\n'. "
"Defaults to empty/disabled.",
)
parser.add_argument(
"-I",
"--ignore-words",
Expand Down Expand Up @@ -1115,6 +1151,20 @@ def main(*args: str) -> int:
else:
ignore_word_regex = None

if options.ignore_multiline_regex:
try:
ignore_multiline_regex = re.compile(
options.ignore_multiline_regex, re.DOTALL
)
except re.error as e:
return _usage_error(
parser,
f"ERROR: invalid --ignore-multiline-regex "
f'"{options.ignore_multiline_regex}" ({e})',
)
else:
ignore_multiline_regex = None

ignore_words, ignore_words_cased = parse_ignore_words_option(
options.ignore_words_list
)
Expand Down Expand Up @@ -1203,7 +1253,11 @@ def main(*args: str) -> int:
for exclude_file in exclude_files:
build_exclude_hashes(exclude_file, exclude_lines)

file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level)
file_opener = FileOpener(
options.hard_encoding_detection,
options.quiet_level,
ignore_multiline_regex,
)

glob_match = GlobMatch(
flatten_clean_comma_separated_arguments(options.skip) if options.skip else []
Expand Down
37 changes: 37 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,43 @@ def test_ignore_regex_option(
assert cs.main(fname, r"--ignore-regex=\bdonn\b") == 1


def test_ignore_multiline_regex_option(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
) -> None:
"""Test ignore regex option functionality."""

# Invalid regex.
result = cs.main("--ignore-multiline-regex=(", std=True)
assert isinstance(result, tuple)
code, stdout, _ = result
assert code == EX_USAGE
assert "usage:" in stdout

fname = tmp_path / "flag.txt"
fname.write_text(
"""
Please see http://example.com/abandonned for info
# codespell:ignore-begin
'''
abandonned
abandonned
'''
# codespell:ignore-end
abandonned
"""
)
assert cs.main(fname) == 4
assert (
cs.main(
fname,
"--ignore-multiline-regex",
"codespell:ignore-begin.*codespell:ignore-end",
)
== 2
)


def test_uri_regex_option(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ max-complexity = 45
[tool.ruff.lint.pylint]
allow-magic-value-types = ["bytes", "int", "str",]
max-args = 13
max-branches = 46
max-returns = 11
max-branches = 47
max-returns = 12
max-statements = 119
Loading