From 4e99c2578667b1dc9389f21e56651cec6c1736d7 Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 13:39:58 +0000 Subject: [PATCH 1/6] skipping spellcheck for tool directives in comments --- pylint/checkers/spelling.py | 43 +++++++++++++----- pylint/testutils/decorator.py | 6 ++- tests/checkers/unittest_spelling.py | 68 +++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 14 deletions(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index 95b46cda2a..dca7da3b51 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -17,6 +17,7 @@ # Copyright (c) 2020 Ganden Schaffner # Copyright (c) 2020 hippo91 # Copyright (c) 2020 Damien Baty +# Copyright (c) 2021 Eli Fine # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/master/COPYING @@ -26,6 +27,7 @@ import os import re import tokenize +from typing import Pattern from pylint.checkers import BaseTokenChecker from pylint.checkers.utils import check_messages @@ -79,7 +81,7 @@ def get_tokenizer( instr = " To make it work, install the 'python-enchant' package." -class WordsWithDigigtsFilter(Filter): +class WordsWithDigitsFilter(Filter): """Skips words with digits.""" def _skip(self, word): @@ -99,7 +101,18 @@ def _skip(self, word): return "_" in word -class CamelCasedWord(Filter): +class RegExFilter(Filter): + r"""Parent class for filters using regular expressions. + This filter skips any words the match the expression assigned to the class attribute `_pattern` + + """ + _pattern: Pattern[str] + + def _skip(self, word) -> bool: + return bool(self._pattern.match(word)) + + +class CamelCasedWord(RegExFilter): r"""Filter skipping over camelCasedWords. This filter skips any words matching the following regular expression: @@ -109,11 +122,8 @@ class CamelCasedWord(Filter): """ _pattern = re.compile(r"^([a-z]+([\d]|[A-Z])(?:\w+)?)") - def _skip(self, word): - return bool(self._pattern.match(word)) - -class SphinxDirectives(Filter): +class SphinxDirectives(RegExFilter): r"""Filter skipping over Sphinx Directives. This filter skips any words matching the following regular expression: @@ -124,11 +134,8 @@ class SphinxDirectives(Filter): # The final ` in the pattern is optional because enchant strips it out _pattern = re.compile(r"^(:([a-z]+)){1,2}:`([^`]+)(`)?") - def _skip(self, word): - return bool(self._pattern.match(word)) - -class ForwardSlashChunkder(Chunker): +class ForwardSlashChunker(Chunker): """ This chunker allows splitting words like 'before/after' into 'before' and 'after' """ @@ -283,15 +290,16 @@ def open(self): self.tokenizer = get_tokenizer( dict_name, - chunkers=[ForwardSlashChunkder], + chunkers=[ForwardSlashChunker], filters=[ EmailFilter, URLFilter, WikiWordFilter, - WordsWithDigigtsFilter, + WordsWithDigitsFilter, WordsWithUnderscores, CamelCasedWord, SphinxDirectives, + # BlackDirectives ], ) self.initialized = True @@ -308,6 +316,17 @@ def _check_spelling(self, msgid, line, line_num): initial_space = 0 if line.strip().startswith("#") and "docstring" not in msgid: line = line.strip()[1:] + for iter_directive in ( + "fmt: on", + "fmt: off", + "noqa:", + "noqa", + "nosec", + "isort:skip", + ): + if line.startswith(" " + iter_directive): + line = line[(len(iter_directive) + 1) :] + break starts_with_comment = True else: starts_with_comment = False diff --git a/pylint/testutils/decorator.py b/pylint/testutils/decorator.py index 3b70867cb9..3fb5f190aa 100644 --- a/pylint/testutils/decorator.py +++ b/pylint/testutils/decorator.py @@ -11,13 +11,15 @@ def set_config(**kwargs): def _wrapper(fun): @functools.wraps(fun) - def _forward(self): + def _forward(self, *args, **test_function_kwargs): for key, value in kwargs.items(): setattr(self.checker.config, key, value) if isinstance(self, CheckerTestCase): # reopen checker in case, it may be interested in configuration change self.checker.open() - fun(self) + fun( + self, *args, **test_function_kwargs + ) # Passing the args and kwargs back to the test function itself allows this decorator to be used on parametrized test cases return _forward diff --git a/tests/checkers/unittest_spelling.py b/tests/checkers/unittest_spelling.py index c14062d2dc..dec6b19ee5 100644 --- a/tests/checkers/unittest_spelling.py +++ b/tests/checkers/unittest_spelling.py @@ -303,6 +303,74 @@ def test_skip_sphinx_directives_2(self): ): self.checker.visit_classdef(stmt) + # @skip_on_missing_package_or_dict + # # @set_config(spelling_dict=spell_dict) + # @pytest.mark.parametrize( + # "num_files,num_jobs,num_checkers", + # [ + # (1, 2, 1), + # (1, 2, 2), + # (1, 2, 3), + # (2, 2, 1), + # (2, 2, 2), + # (2, 2, 3), + # (3, 2, 1), + # (3, 2, 2), + # (3, 2, 3), + # (3, 1, 1), + # (3, 1, 2), + # (3, 1, 3), + # (3, 5, 1), + # (3, 5, 2), + # (3, 5, 3), + # (10, 2, 1), + # (10, 2, 2), + # (10, 2, 3), + # (2, 10, 1), + # (2, 10, 2), + # (2, 10, 3), + # ], + # ) + # def test_compare_workers_to_single_proc(self, num_files, num_jobs, num_checkers): + # assert True + + @skip_on_missing_package_or_dict + @set_config(spelling_dict=spell_dict) + @pytest.mark.parametrize( + ",".join( + ( + "misspelled_portion_of_directive", + "second_portion_of_directive", + "description", + ) + ), + ( + ("fmt", ": on", "black directive to turn on formatting"), + ("fmt", ": off", "black directive to turn off formatting"), + ("noqa", "", "pycharm directive"), + ("noqa", ":", "flake8 / zimports directive"), + ("nosec", "", "bandit directive"), + ("isort", ":skip", "isort directive"), + ), + ) + def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment( # pylint:disable=unused-argument # Having the extra description parameter allows the description to show up in the pytest output as part of the test name when running parametrized tests + self, misspelled_portion_of_directive, second_portion_of_directive, description + ): + full_comment = f"# {misspelled_portion_of_directive}{second_portion_of_directive} {misspelled_portion_of_directive}" + with self.assertAddsMessages( + Message( + "wrong-spelling-in-comment", + line=1, + args=( + misspelled_portion_of_directive, + full_comment, + f" {'^'*len(misspelled_portion_of_directive)}", + self._get_msg_suggestions(misspelled_portion_of_directive), + ), + ) + ): + self.checker.process_tokens(_tokenize_str(full_comment)) + @skip_on_missing_package_or_dict @set_config(spelling_dict=spell_dict) def test_handle_words_joined_by_forward_slash(self): From 9b72baaf75a222874b3e7884e04a2a37e4697037 Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 14:38:16 +0000 Subject: [PATCH 2/6] skipping spellcheck for code flanked in backticks --- ChangeLog | 10 +++- pylint/checkers/spelling.py | 24 +++++++++- tests/checkers/unittest_spelling.py | 72 ++++++++++++++++------------- 3 files changed, 72 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index eaf3fb6d11..62d3d62e27 100644 --- a/ChangeLog +++ b/ChangeLog @@ -63,6 +63,14 @@ Release date: Undefined * Don't show ``DuplicateBasesError`` for attribute access +* Allow code flanked in backticks to be skipped by spellchecker + + Closes #4319 + +* Allow Python tool directives (for black, flake8, zimports, isort, mypy, bandit, pycharm) at beginning of comments to be skipped by spellchecker + + Closes #4320 + What's New in Pylint 2.7.4? =========================== @@ -298,7 +306,7 @@ Release date: 2021-02-21 Close #2738 -* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comman, etc.) +* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comma, etc.) * Improve lint message for ``singleton-comparison`` with bools diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index dca7da3b51..b08ec772a6 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -103,7 +103,7 @@ def _skip(self, word): class RegExFilter(Filter): r"""Parent class for filters using regular expressions. - This filter skips any words the match the expression assigned to the class attribute `_pattern` + This filter skips any words the match the expression assigned to the class attribute ``_pattern`` """ _pattern: Pattern[str] @@ -176,6 +176,24 @@ def _next(self): raise StopIteration() +CODE_FLANKED_IN_BACKTICK_REGEX = re.compile(r"(\s|^)(`{1,2})([^`]+)(\2)([^`]|$)") + + +def _strip_code_flanked_in_backticks(line: str) -> str: + """Alter line so code flanked in backticks is ignored. + + Pyenchant automatically strips backticks when parsing tokens, so this cannot be done at the individual filter level. + + """ + + def replace_code_but_leave_surrounding_characters(match_obj) -> str: + return match_obj.group(1) + match_obj.group(5) + + return CODE_FLANKED_IN_BACKTICK_REGEX.sub( + replace_code_but_leave_surrounding_characters, line + ) + + class SpellingChecker(BaseTokenChecker): """Check spelling in comments and docstrings""" @@ -323,6 +341,7 @@ def _check_spelling(self, msgid, line, line_num): "noqa", "nosec", "isort:skip", + "mypy:", ): if line.startswith(" " + iter_directive): line = line[(len(iter_directive) + 1) :] @@ -330,6 +349,9 @@ def _check_spelling(self, msgid, line, line_num): starts_with_comment = True else: starts_with_comment = False + + line = _strip_code_flanked_in_backticks(line) + for word, word_start_at in self.tokenizer(line.strip()): word_start_at += initial_space lower_cased_word = word.casefold() diff --git a/tests/checkers/unittest_spelling.py b/tests/checkers/unittest_spelling.py index dec6b19ee5..0d946bc847 100644 --- a/tests/checkers/unittest_spelling.py +++ b/tests/checkers/unittest_spelling.py @@ -37,7 +37,9 @@ pass -class TestSpellingChecker(CheckerTestCase): +class TestSpellingChecker( + CheckerTestCase +): # pylint:disable=too-many-public-methods # This is a test case class, not sure why it would be relevant to have this pylint rule enforced for test case classes CHECKER_CLASS = spelling.SpellingChecker skip_on_missing_package_or_dict = pytest.mark.skipif( @@ -303,37 +305,6 @@ def test_skip_sphinx_directives_2(self): ): self.checker.visit_classdef(stmt) - # @skip_on_missing_package_or_dict - # # @set_config(spelling_dict=spell_dict) - # @pytest.mark.parametrize( - # "num_files,num_jobs,num_checkers", - # [ - # (1, 2, 1), - # (1, 2, 2), - # (1, 2, 3), - # (2, 2, 1), - # (2, 2, 2), - # (2, 2, 3), - # (3, 2, 1), - # (3, 2, 2), - # (3, 2, 3), - # (3, 1, 1), - # (3, 1, 2), - # (3, 1, 3), - # (3, 5, 1), - # (3, 5, 2), - # (3, 5, 3), - # (10, 2, 1), - # (10, 2, 2), - # (10, 2, 3), - # (2, 10, 1), - # (2, 10, 2), - # (2, 10, 3), - # ], - # ) - # def test_compare_workers_to_single_proc(self, num_files, num_jobs, num_checkers): - # assert True - @skip_on_missing_package_or_dict @set_config(spelling_dict=spell_dict) @pytest.mark.parametrize( @@ -351,6 +322,7 @@ def test_skip_sphinx_directives_2(self): ("noqa", ":", "flake8 / zimports directive"), ("nosec", "", "bandit directive"), ("isort", ":skip", "isort directive"), + ("mypy", ":", "mypy directive"), ), ) def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment( # pylint:disable=unused-argument # Having the extra description parameter allows the description to show up in the pytest output as part of the test name when running parametrized tests @@ -371,6 +343,42 @@ def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_ ): self.checker.process_tokens(_tokenize_str(full_comment)) + @skip_on_missing_package_or_dict + @set_config(spelling_dict=spell_dict) + def test_skip_code_flanked_in_double_backticks(self): + full_comment = "# The function ``.qsize()`` .qsize()" + with self.assertAddsMessages( + Message( + "wrong-spelling-in-comment", + line=1, + args=( + "qsize", + full_comment, + " ^^^^^", + self._get_msg_suggestions("qsize"), + ), + ) + ): + self.checker.process_tokens(_tokenize_str(full_comment)) + + @skip_on_missing_package_or_dict + @set_config(spelling_dict=spell_dict) + def test_skip_code_flanked_in_single_backticks(self): + full_comment = "# The function `.qsize()` .qsize()" + with self.assertAddsMessages( + Message( + "wrong-spelling-in-comment", + line=1, + args=( + "qsize", + full_comment, + " ^^^^^", + self._get_msg_suggestions("qsize"), + ), + ) + ): + self.checker.process_tokens(_tokenize_str(full_comment)) + @skip_on_missing_package_or_dict @set_config(spelling_dict=spell_dict) def test_handle_words_joined_by_forward_slash(self): From d2c76e94935eec75dbe0e088f650e07a826adffe Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 14:53:40 +0000 Subject: [PATCH 3/6] removing extraneous line and adding clarifying comment --- pylint/checkers/spelling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index b08ec772a6..341352d384 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -317,7 +317,6 @@ def open(self): WordsWithUnderscores, CamelCasedWord, SphinxDirectives, - # BlackDirectives ], ) self.initialized = True @@ -334,6 +333,7 @@ def _check_spelling(self, msgid, line, line_num): initial_space = 0 if line.strip().startswith("#") and "docstring" not in msgid: line = line.strip()[1:] + # A ``Filter`` cannot determine if the directive is at the beginning of a line, nor determine if a colon is present or not (``pyenchant`` strips trailing colons). So implementing this here. for iter_directive in ( "fmt: on", "fmt: off", From 30175c3ef2295e1653f483b2d4e5747ff99a5f9c Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 13:08:58 -0400 Subject: [PATCH 4/6] Update pylint/checkers/spelling.py Co-authored-by: Pierre Sassoulas --- pylint/checkers/spelling.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index 341352d384..bb2e30011c 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -182,9 +182,8 @@ def _next(self): def _strip_code_flanked_in_backticks(line: str) -> str: """Alter line so code flanked in backticks is ignored. - Pyenchant automatically strips backticks when parsing tokens, so this cannot be done at the individual filter level. - - """ + Pyenchant automatically strips backticks when parsing tokens, + so this cannot be done at the individual filter level.""" def replace_code_but_leave_surrounding_characters(match_obj) -> str: return match_obj.group(1) + match_obj.group(5) From ce59966a522eb0ca965a410d8fef55db3ebc8faf Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 17:27:53 +0000 Subject: [PATCH 5/6] obtaining list of comment directives to ignore from pylintrc --- pylint/checkers/spelling.py | 34 +++++++++++++++++++---------- pylint/testutils/decorator.py | 10 +++++---- tests/checkers/unittest_spelling.py | 21 ++++++++++++++++++ 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index bb2e30011c..a1a9f6825b 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -102,10 +102,13 @@ def _skip(self, word): class RegExFilter(Filter): - r"""Parent class for filters using regular expressions. - This filter skips any words the match the expression assigned to the class attribute ``_pattern`` + """Parent class for filters using regular expressions. + + This filter skips any words the match the expression + assigned to the class attribute ``_pattern``. """ + _pattern: Pattern[str] def _skip(self, word) -> bool: @@ -269,6 +272,15 @@ class SpellingChecker(BaseTokenChecker): "help": "Limits count of emitted suggestions for spelling mistakes.", }, ), + ( + "spelling-ignore-comment-directives", + { + "default": "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:", + "type": "string", + "metavar": "", + "help": "List of comma separated words that should not be considered directives if they appear and the beginning of a comment and should not be checked.", + }, + ), ) def open(self): @@ -288,6 +300,10 @@ def open(self): # "pylint" appears in comments in pylint pragmas. self.ignore_list.extend(["param", "pylint"]) + self.ignore_comment_directive_list = [ + w.strip() for w in self.config.spelling_ignore_comment_directives.split(",") + ] + # Expand tilde to allow e.g. spelling-private-dict-file = ~/.pylintdict if self.config.spelling_private_dict_file: self.config.spelling_private_dict_file = os.path.expanduser( @@ -332,16 +348,10 @@ def _check_spelling(self, msgid, line, line_num): initial_space = 0 if line.strip().startswith("#") and "docstring" not in msgid: line = line.strip()[1:] - # A ``Filter`` cannot determine if the directive is at the beginning of a line, nor determine if a colon is present or not (``pyenchant`` strips trailing colons). So implementing this here. - for iter_directive in ( - "fmt: on", - "fmt: off", - "noqa:", - "noqa", - "nosec", - "isort:skip", - "mypy:", - ): + # A ``Filter`` cannot determine if the directive is at the beginning of a line, + # nor determine if a colon is present or not (``pyenchant`` strips trailing colons). + # So implementing this here. + for iter_directive in self.ignore_comment_directive_list: if line.startswith(" " + iter_directive): line = line[(len(iter_directive) + 1) :] break diff --git a/pylint/testutils/decorator.py b/pylint/testutils/decorator.py index 3fb5f190aa..5e5077e7bc 100644 --- a/pylint/testutils/decorator.py +++ b/pylint/testutils/decorator.py @@ -7,7 +7,11 @@ def set_config(**kwargs): - """Decorator for setting config values on a checker.""" + """Decorator for setting config values on a checker. + + Passing the args and kwargs back to the test function itself + allows this decorator to be used on parametrized test cases. + """ def _wrapper(fun): @functools.wraps(fun) @@ -17,9 +21,7 @@ def _forward(self, *args, **test_function_kwargs): if isinstance(self, CheckerTestCase): # reopen checker in case, it may be interested in configuration change self.checker.open() - fun( - self, *args, **test_function_kwargs - ) # Passing the args and kwargs back to the test function itself allows this decorator to be used on parametrized test cases + fun(self, *args, **test_function_kwargs) return _forward diff --git a/tests/checkers/unittest_spelling.py b/tests/checkers/unittest_spelling.py index 0d946bc847..d5011dba0e 100644 --- a/tests/checkers/unittest_spelling.py +++ b/tests/checkers/unittest_spelling.py @@ -379,6 +379,27 @@ def test_skip_code_flanked_in_single_backticks(self): ): self.checker.process_tokens(_tokenize_str(full_comment)) + @skip_on_missing_package_or_dict + @set_config( + spelling_dict=spell_dict, + spelling_ignore_comment_directives="newdirective:,noqa", + ) + def test_skip_directives_specified_in_pylintrc(self): + full_comment = "# newdirective: do this newdirective" + with self.assertAddsMessages( + Message( + "wrong-spelling-in-comment", + line=1, + args=( + "newdirective", + full_comment, + " ^^^^^^^^^^^^", + self._get_msg_suggestions("newdirective"), + ), + ) + ): + self.checker.process_tokens(_tokenize_str(full_comment)) + @skip_on_missing_package_or_dict @set_config(spelling_dict=spell_dict) def test_handle_words_joined_by_forward_slash(self): From d75cb293b7c6eb2cac121674475eddc03828bb42 Mon Sep 17 00:00:00 2001 From: Eli Fine Date: Fri, 9 Apr 2021 17:30:44 +0000 Subject: [PATCH 6/6] fixing comments to break into multiple lines --- pylint/checkers/spelling.py | 2 +- tests/checkers/unittest_spelling.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index a1a9f6825b..41e53afbe7 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -278,7 +278,7 @@ class SpellingChecker(BaseTokenChecker): "default": "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:", "type": "string", "metavar": "", - "help": "List of comma separated words that should not be considered directives if they appear and the beginning of a comment and should not be checked.", + "help": "List of comma separated words that should be considered directives if they appear and the beginning of a comment and should not be checked.", }, ), ) diff --git a/tests/checkers/unittest_spelling.py b/tests/checkers/unittest_spelling.py index d5011dba0e..2d48670cb3 100644 --- a/tests/checkers/unittest_spelling.py +++ b/tests/checkers/unittest_spelling.py @@ -37,9 +37,9 @@ pass -class TestSpellingChecker( - CheckerTestCase -): # pylint:disable=too-many-public-methods # This is a test case class, not sure why it would be relevant to have this pylint rule enforced for test case classes +class TestSpellingChecker(CheckerTestCase): # pylint:disable=too-many-public-methods + # This is a test case class, not sure why it would be relevant to have + # this pylint rule enforced for test case classes. CHECKER_CLASS = spelling.SpellingChecker skip_on_missing_package_or_dict = pytest.mark.skipif( @@ -325,8 +325,14 @@ def test_skip_sphinx_directives_2(self): ("mypy", ":", "mypy directive"), ), ) - def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment( # pylint:disable=unused-argument # Having the extra description parameter allows the description to show up in the pytest output as part of the test name when running parametrized tests - self, misspelled_portion_of_directive, second_portion_of_directive, description + def test_skip_tool_directives_at_beginning_of_comments_but_still_raise_error_if_directive_appears_later_in_comment( # pylint:disable=unused-argument + # Having the extra description parameter allows the description + # to show up in the pytest output as part of the test name + # when running parametrized tests. + self, + misspelled_portion_of_directive, + second_portion_of_directive, + description, ): full_comment = f"# {misspelled_portion_of_directive}{second_portion_of_directive} {misspelled_portion_of_directive}" with self.assertAddsMessages(