pylint-dev · Pierre-Sassoulas · Apr 10, 2021 · Apr 9, 2021 · Apr 9, 2021 · Apr 9, 2021
diff --git a/ChangeLog b/ChangeLog
@@ -63,6 +63,14 @@ Release date: Undefined
 
 * Don't show ``DuplicateBasesError`` for attribute access
 
+* Allow code flanked in backticks to be skipped by spellchecker
+
+  Closes #4319
+
+* Allow Python tool directives (for black, flake8, zimports, isort, mypy, bandit, pycharm) at beginning of comments to be skipped by spellchecker
+
+  Closes #4320
+
 
 What's New in Pylint 2.7.4?
 ===========================
@@ -298,7 +306,7 @@ Release date: 2021-02-21
 
   Close #2738
 
-* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comman, etc.)
+* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comma, etc.)
 
 * Improve lint message for ``singleton-comparison`` with bools
 

diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py
@@ -17,6 +17,7 @@
 # Copyright (c) 2020 Ganden Schaffner <[email protected]>
 # Copyright (c) 2020 hippo91 <[email protected]>
 # Copyright (c) 2020 Damien Baty <[email protected]>
+# Copyright (c) 2021 Eli Fine <[email protected]>
 
 # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 # For details: https://github.com/PyCQA/pylint/blob/master/COPYING
@@ -26,6 +27,7 @@
 import os
 import re
 import tokenize
+from typing import Pattern
 
 from pylint.checkers import BaseTokenChecker
 from pylint.checkers.utils import check_messages
@@ -79,7 +81,7 @@ def get_tokenizer(
     instr = " To make it work, install the 'python-enchant' package."
 
 
-class WordsWithDigigtsFilter(Filter):
+class WordsWithDigitsFilter(Filter):
     """Skips words with digits."""
 
     def _skip(self, word):
@@ -99,7 +101,21 @@ def _skip(self, word):
         return "_" in word
 
 
-class CamelCasedWord(Filter):
+class RegExFilter(Filter):
+    """Parent class for filters using regular expressions.
+
+    This filter skips any words the match the expression
+    assigned to the class attribute ``_pattern``.
+
+    """
+
+    _pattern: Pattern[str]
+
+    def _skip(self, word) -> bool:
+        return bool(self._pattern.match(word))
+
+
+class CamelCasedWord(RegExFilter):
     r"""Filter skipping over camelCasedWords.
     This filter skips any words matching the following regular expression:
 
@@ -109,11 +125,8 @@ class CamelCasedWord(Filter):
     """
     _pattern = re.compile(r"^([a-z]+([\d]|[A-Z])(?:\w+)?)")
 
-    def _skip(self, word):
-        return bool(self._pattern.match(word))
 
-
-class SphinxDirectives(Filter):
+class SphinxDirectives(RegExFilter):
     r"""Filter skipping over Sphinx Directives.
     This filter skips any words matching the following regular expression:
 
@@ -124,11 +137,8 @@ class SphinxDirectives(Filter):
     # The final ` in the pattern is optional because enchant strips it out
     _pattern = re.compile(r"^(:([a-z]+)){1,2}:`([^`]+)(`)?")
 
-    def _skip(self, word):
-        return bool(self._pattern.match(word))
-
 
-class ForwardSlashChunkder(Chunker):
+class ForwardSlashChunker(Chunker):
     """
     This chunker allows splitting words like 'before/after' into 'before' and 'after'
     """
@@ -169,6 +179,23 @@ def _next(self):
         raise StopIteration()
 
 
+CODE_FLANKED_IN_BACKTICK_REGEX = re.compile(r"(\s|^)(`{1,2})([^`]+)(\2)([^`]|$)")
+
+
+def _strip_code_flanked_in_backticks(line: str) -> str:
+    """Alter line so code flanked in backticks is ignored.
+
+    Pyenchant automatically strips backticks when parsing tokens,
+    so this cannot be done at the individual filter level."""
+
+    def replace_code_but_leave_surrounding_characters(match_obj) -> str:
+        return match_obj.group(1) + match_obj.group(5)
+
+    return CODE_FLANKED_IN_BACKTICK_REGEX.sub(
+        replace_code_but_leave_surrounding_characters, line
+    )
+
+
 class SpellingChecker(BaseTokenChecker):
     """Check spelling in comments and docstrings"""
 
@@ -245,6 +272,15 @@ class SpellingChecker(BaseTokenChecker):
                 "help": "Limits count of emitted suggestions for spelling mistakes.",
             },
         ),
+        (
+            "spelling-ignore-comment-directives",
+            {
+                "default": "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:",
+                "type": "string",
+                "metavar": "<comma separated words>",
+                "help": "List of comma separated words that should be considered directives if they appear and the beginning of a comment and should not be checked.",
+            },
+        ),
     )
 
     def open(self):
@@ -264,6 +300,10 @@ def open(self):
         # "pylint" appears in comments in pylint pragmas.
         self.ignore_list.extend(["param", "pylint"])
 
+        self.ignore_comment_directive_list = [
+            w.strip() for w in self.config.spelling_ignore_comment_directives.split(",")
+        ]
+
         # Expand tilde to allow e.g. spelling-private-dict-file = ~/.pylintdict
         if self.config.spelling_private_dict_file:
             self.config.spelling_private_dict_file = os.path.expanduser(
@@ -283,12 +323,12 @@ def open(self):
 
         self.tokenizer = get_tokenizer(
             dict_name,
-            chunkers=[ForwardSlashChunkder],
+            chunkers=[ForwardSlashChunker],
             filters=[
                 EmailFilter,
                 URLFilter,
                 WikiWordFilter,
-                WordsWithDigigtsFilter,
+                WordsWithDigitsFilter,
                 WordsWithUnderscores,
                 CamelCasedWord,
                 SphinxDirectives,
@@ -308,9 +348,19 @@ def _check_spelling(self, msgid, line, line_num):
             initial_space = 0
         if line.strip().startswith("#") and "docstring" not in msgid:
             line = line.strip()[1:]
+            # A ``Filter`` cannot determine if the directive is at the beginning of a line,
+            #   nor determine if a colon is present or not (``pyenchant`` strips trailing colons).
+            #   So implementing this here.
+            for iter_directive in self.ignore_comment_directive_list:
+                if line.startswith(" " + iter_directive):
+                    line = line[(len(iter_directive) + 1) :]
+                    break
             starts_with_comment = True
         else:
             starts_with_comment = False
+
+        line = _strip_code_flanked_in_backticks(line)
+
         for word, word_start_at in self.tokenizer(line.strip()):
             word_start_at += initial_space
             lower_cased_word = word.casefold()

diff --git a/pylint/testutils/decorator.py b/pylint/testutils/decorator.py
@@ -7,17 +7,21 @@
 
 
 def set_config(**kwargs):
-    """Decorator for setting config values on a checker."""
+    """Decorator for setting config values on a checker.
+
+    Passing the args and kwargs back to the test function itself
+    allows this decorator to be used on parametrized test cases.
+    """
 
     def _wrapper(fun):
         @functools.wraps(fun)
-        def _forward(self):
+        def _forward(self, *args, **test_function_kwargs):
             for key, value in kwargs.items():
                 setattr(self.checker.config, key, value)
             if isinstance(self, CheckerTestCase):
                 # reopen checker in case, it may be interested in configuration change
                 self.checker.open()
-            fun(self)
+            fun(self, *args, **test_function_kwargs)
 
         return _forward
 

diff --git a/tests/checkers/unittest_spelling.py b/tests/checkers/unittest_spelling.py
@@ -37,7 +37,9 @@
         pass
 
 
-class TestSpellingChecker(CheckerTestCase):
+class TestSpellingChecker(CheckerTestCase):  # pylint:disable=too-many-public-methods
+    # This is a test case class, not sure why it would be relevant to have
+    #   this pylint rule enforced for test case classes.
     CHECKER_CLASS = spelling.SpellingChecker
 
     skip_on_missing_package_or_dict = pytest.mark.skipif(
@@ -303,6 +305,107 @@ def test_skip_sphinx_directives_2(self):
         ):
             self.checker.visit_classdef(stmt)
 
+    @skip_on_missing_package_or_dict
+    @set_config(spelling_dict=spell_dict)
+    @pytest.mark.parametrize(
+        ",".join(
+            (
+                "misspelled_portion_of_directive",
+                "second_portion_of_directive",
+                "description",
+            )
+        ),
+        (
+            ("fmt", ": on", "black directive to turn on formatting"),
+            ("fmt", ": off", "black directive to turn off formatting"),
+            ("noqa", "", "pycharm directive"),
+            ("noqa", ":", "flake8 / zimports directive"),
+            ("nosec", "", "bandit directive"),
+            ("isort", ":skip", "isort directive"),
+            ("mypy", ":", "mypy directive"),
+        ),
+    )
+    def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment(  # pylint:disable=unused-argument
+        # Having the extra description parameter allows the description
+        #   to show up in the pytest output as part of the test name
+        #   when running parametrized tests.
+        self,
+        misspelled_portion_of_directive,
+        second_portion_of_directive,
+        description,
+    ):
+        full_comment = f"# {misspelled_portion_of_directive}{second_portion_of_directive} {misspelled_portion_of_directive}"
+        with self.assertAddsMessages(
+            Message(
+                "wrong-spelling-in-comment",
+                line=1,
+                args=(
+                    misspelled_portion_of_directive,
+                    full_comment,
+                    f"  {'^'*len(misspelled_portion_of_directive)}",
+                    self._get_msg_suggestions(misspelled_portion_of_directive),
+                ),
+            )
+        ):
+            self.checker.process_tokens(_tokenize_str(full_comment))
+
+    @skip_on_missing_package_or_dict
+    @set_config(spelling_dict=spell_dict)
+    def test_skip_code_flanked_in_double_backticks(self):
+        full_comment = "# The function ``.qsize()`` .qsize()"
+        with self.assertAddsMessages(
+            Message(
+                "wrong-spelling-in-comment",
+                line=1,
+                args=(
+                    "qsize",
+                    full_comment,
+                    "                 ^^^^^",
+                    self._get_msg_suggestions("qsize"),
+                ),
+            )
+        ):
+            self.checker.process_tokens(_tokenize_str(full_comment))
+
+    @skip_on_missing_package_or_dict
+    @set_config(spelling_dict=spell_dict)
+    def test_skip_code_flanked_in_single_backticks(self):
+        full_comment = "# The function `.qsize()` .qsize()"
+        with self.assertAddsMessages(
+            Message(
+                "wrong-spelling-in-comment",
+                line=1,
+                args=(
+                    "qsize",
+                    full_comment,
+                    "                 ^^^^^",
+                    self._get_msg_suggestions("qsize"),
+                ),
+            )
+        ):
+            self.checker.process_tokens(_tokenize_str(full_comment))
+
+    @skip_on_missing_package_or_dict
+    @set_config(
+        spelling_dict=spell_dict,
+        spelling_ignore_comment_directives="newdirective:,noqa",
+    )
+    def test_skip_directives_specified_in_pylintrc(self):
+        full_comment = "# newdirective: do this newdirective"
+        with self.assertAddsMessages(
+            Message(
+                "wrong-spelling-in-comment",
+                line=1,
+                args=(
+                    "newdirective",
+                    full_comment,
+                    "          ^^^^^^^^^^^^",
+                    self._get_msg_suggestions("newdirective"),
+                ),
+            )
+        ):
+            self.checker.process_tokens(_tokenize_str(full_comment))
+
     @skip_on_missing_package_or_dict
     @set_config(spelling_dict=spell_dict)
     def test_handle_words_joined_by_forward_slash(self):