Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ejf/improved spellcheck 4319 4320 #4330

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ Release date: Undefined

* Don't show ``DuplicateBasesError`` for attribute access

* Allow code flanked in backticks to be skipped by spellchecker

Closes #4319

* Allow Python tool directives (for black, flake8, zimports, isort, mypy, bandit, pycharm) at beginning of comments to be skipped by spellchecker

Closes #4320


What's New in Pylint 2.7.4?
===========================
Expand Down Expand Up @@ -298,7 +306,7 @@ Release date: 2021-02-21

Close #2738

* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comman, etc.)
* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comma, etc.)

* Improve lint message for ``singleton-comparison`` with bools

Expand Down
74 changes: 62 additions & 12 deletions pylint/checkers/spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Copyright (c) 2020 Ganden Schaffner <[email protected]>
# Copyright (c) 2020 hippo91 <[email protected]>
# Copyright (c) 2020 Damien Baty <[email protected]>
# Copyright (c) 2021 Eli Fine <[email protected]>
Pierre-Sassoulas marked this conversation as resolved.
Show resolved Hide resolved

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
Expand All @@ -26,6 +27,7 @@
import os
import re
import tokenize
from typing import Pattern

from pylint.checkers import BaseTokenChecker
from pylint.checkers.utils import check_messages
Expand Down Expand Up @@ -79,7 +81,7 @@ def get_tokenizer(
instr = " To make it work, install the 'python-enchant' package."


class WordsWithDigigtsFilter(Filter):
class WordsWithDigitsFilter(Filter):
Pierre-Sassoulas marked this conversation as resolved.
Show resolved Hide resolved
"""Skips words with digits."""

def _skip(self, word):
Expand All @@ -99,7 +101,21 @@ def _skip(self, word):
return "_" in word


class CamelCasedWord(Filter):
class RegExFilter(Filter):
"""Parent class for filters using regular expressions.

This filter skips any words the match the expression
assigned to the class attribute ``_pattern``.

"""

_pattern: Pattern[str]

def _skip(self, word) -> bool:
return bool(self._pattern.match(word))


class CamelCasedWord(RegExFilter):
r"""Filter skipping over camelCasedWords.
This filter skips any words matching the following regular expression:

Expand All @@ -109,11 +125,8 @@ class CamelCasedWord(Filter):
"""
_pattern = re.compile(r"^([a-z]+([\d]|[A-Z])(?:\w+)?)")

def _skip(self, word):
return bool(self._pattern.match(word))


class SphinxDirectives(Filter):
class SphinxDirectives(RegExFilter):
r"""Filter skipping over Sphinx Directives.
This filter skips any words matching the following regular expression:

Expand All @@ -124,11 +137,8 @@ class SphinxDirectives(Filter):
# The final ` in the pattern is optional because enchant strips it out
_pattern = re.compile(r"^(:([a-z]+)){1,2}:`([^`]+)(`)?")

def _skip(self, word):
return bool(self._pattern.match(word))


class ForwardSlashChunkder(Chunker):
class ForwardSlashChunker(Chunker):
"""
This chunker allows splitting words like 'before/after' into 'before' and 'after'
"""
Expand Down Expand Up @@ -169,6 +179,23 @@ def _next(self):
raise StopIteration()


CODE_FLANKED_IN_BACKTICK_REGEX = re.compile(r"(\s|^)(`{1,2})([^`]+)(\2)([^`]|$)")


def _strip_code_flanked_in_backticks(line: str) -> str:
"""Alter line so code flanked in backticks is ignored.

Pyenchant automatically strips backticks when parsing tokens,
so this cannot be done at the individual filter level."""

def replace_code_but_leave_surrounding_characters(match_obj) -> str:
return match_obj.group(1) + match_obj.group(5)

return CODE_FLANKED_IN_BACKTICK_REGEX.sub(
replace_code_but_leave_surrounding_characters, line
)


class SpellingChecker(BaseTokenChecker):
"""Check spelling in comments and docstrings"""

Expand Down Expand Up @@ -245,6 +272,15 @@ class SpellingChecker(BaseTokenChecker):
"help": "Limits count of emitted suggestions for spelling mistakes.",
},
),
(
"spelling-ignore-comment-directives",
{
"default": "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:",
"type": "string",
"metavar": "<comma separated words>",
"help": "List of comma separated words that should be considered directives if they appear and the beginning of a comment and should not be checked.",
},
),
)

def open(self):
Expand All @@ -264,6 +300,10 @@ def open(self):
# "pylint" appears in comments in pylint pragmas.
self.ignore_list.extend(["param", "pylint"])

self.ignore_comment_directive_list = [
w.strip() for w in self.config.spelling_ignore_comment_directives.split(",")
]

# Expand tilde to allow e.g. spelling-private-dict-file = ~/.pylintdict
if self.config.spelling_private_dict_file:
self.config.spelling_private_dict_file = os.path.expanduser(
Expand All @@ -283,12 +323,12 @@ def open(self):

self.tokenizer = get_tokenizer(
dict_name,
chunkers=[ForwardSlashChunkder],
chunkers=[ForwardSlashChunker],
filters=[
EmailFilter,
URLFilter,
WikiWordFilter,
WordsWithDigigtsFilter,
WordsWithDigitsFilter,
WordsWithUnderscores,
CamelCasedWord,
SphinxDirectives,
Expand All @@ -308,9 +348,19 @@ def _check_spelling(self, msgid, line, line_num):
initial_space = 0
if line.strip().startswith("#") and "docstring" not in msgid:
line = line.strip()[1:]
# A ``Filter`` cannot determine if the directive is at the beginning of a line,
# nor determine if a colon is present or not (``pyenchant`` strips trailing colons).
# So implementing this here.
for iter_directive in self.ignore_comment_directive_list:
if line.startswith(" " + iter_directive):
line = line[(len(iter_directive) + 1) :]
break
starts_with_comment = True
else:
starts_with_comment = False

line = _strip_code_flanked_in_backticks(line)

for word, word_start_at in self.tokenizer(line.strip()):
word_start_at += initial_space
lower_cased_word = word.casefold()
Expand Down
10 changes: 7 additions & 3 deletions pylint/testutils/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@


def set_config(**kwargs):
"""Decorator for setting config values on a checker."""
"""Decorator for setting config values on a checker.

Passing the args and kwargs back to the test function itself
allows this decorator to be used on parametrized test cases.
"""

def _wrapper(fun):
@functools.wraps(fun)
def _forward(self):
def _forward(self, *args, **test_function_kwargs):
for key, value in kwargs.items():
setattr(self.checker.config, key, value)
if isinstance(self, CheckerTestCase):
# reopen checker in case, it may be interested in configuration change
self.checker.open()
fun(self)
fun(self, *args, **test_function_kwargs)

return _forward

Expand Down
105 changes: 104 additions & 1 deletion tests/checkers/unittest_spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
pass


class TestSpellingChecker(CheckerTestCase):
class TestSpellingChecker(CheckerTestCase): # pylint:disable=too-many-public-methods
# This is a test case class, not sure why it would be relevant to have
# this pylint rule enforced for test case classes.
CHECKER_CLASS = spelling.SpellingChecker

skip_on_missing_package_or_dict = pytest.mark.skipif(
Expand Down Expand Up @@ -303,6 +305,107 @@ def test_skip_sphinx_directives_2(self):
):
self.checker.visit_classdef(stmt)

@skip_on_missing_package_or_dict
@set_config(spelling_dict=spell_dict)
@pytest.mark.parametrize(
",".join(
(
"misspelled_portion_of_directive",
"second_portion_of_directive",
"description",
)
),
(
("fmt", ": on", "black directive to turn on formatting"),
("fmt", ": off", "black directive to turn off formatting"),
("noqa", "", "pycharm directive"),
("noqa", ":", "flake8 / zimports directive"),
("nosec", "", "bandit directive"),
("isort", ":skip", "isort directive"),
("mypy", ":", "mypy directive"),
),
)
def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment( # pylint:disable=unused-argument
# Having the extra description parameter allows the description
# to show up in the pytest output as part of the test name
# when running parametrized tests.
self,
misspelled_portion_of_directive,
second_portion_of_directive,
description,
):
full_comment = f"# {misspelled_portion_of_directive}{second_portion_of_directive} {misspelled_portion_of_directive}"
with self.assertAddsMessages(
Message(
"wrong-spelling-in-comment",
line=1,
args=(
misspelled_portion_of_directive,
full_comment,
f" {'^'*len(misspelled_portion_of_directive)}",
self._get_msg_suggestions(misspelled_portion_of_directive),
),
)
):
self.checker.process_tokens(_tokenize_str(full_comment))

@skip_on_missing_package_or_dict
@set_config(spelling_dict=spell_dict)
def test_skip_code_flanked_in_double_backticks(self):
full_comment = "# The function ``.qsize()`` .qsize()"
with self.assertAddsMessages(
Message(
"wrong-spelling-in-comment",
line=1,
args=(
"qsize",
full_comment,
" ^^^^^",
self._get_msg_suggestions("qsize"),
),
)
):
self.checker.process_tokens(_tokenize_str(full_comment))

@skip_on_missing_package_or_dict
@set_config(spelling_dict=spell_dict)
def test_skip_code_flanked_in_single_backticks(self):
full_comment = "# The function `.qsize()` .qsize()"
with self.assertAddsMessages(
Message(
"wrong-spelling-in-comment",
line=1,
args=(
"qsize",
full_comment,
" ^^^^^",
self._get_msg_suggestions("qsize"),
),
)
):
self.checker.process_tokens(_tokenize_str(full_comment))

@skip_on_missing_package_or_dict
@set_config(
spelling_dict=spell_dict,
spelling_ignore_comment_directives="newdirective:,noqa",
)
def test_skip_directives_specified_in_pylintrc(self):
full_comment = "# newdirective: do this newdirective"
with self.assertAddsMessages(
Message(
"wrong-spelling-in-comment",
line=1,
args=(
"newdirective",
full_comment,
" ^^^^^^^^^^^^",
self._get_msg_suggestions("newdirective"),
),
)
):
self.checker.process_tokens(_tokenize_str(full_comment))

@skip_on_missing_package_or_dict
@set_config(spelling_dict=spell_dict)
def test_handle_words_joined_by_forward_slash(self):
Expand Down