akaihola · akaihola · Sep 4, 2022 · Sep 2, 2022 · Sep 2, 2022 · Sep 2, 2022
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -16,12 +16,13 @@ Fixed
 -----
 - ``darker --revision=a..b .`` now works since the repository root is now always
   considered to have existed in all historical commits.
-- Ignore linter lines which refer to files outside the common root of paths on the
-  command line. Fixes a failure when Pylint notifies about obsolete options in
-  ``.pylintrc``.
+- Ignore linter lines which refer to non-Python files or files outside the common root
+  of paths on the command line. Fixes a failure when Pylint notifies about obsolete
+  options in ``.pylintrc``.
 - For linting Darker's own code base, require Pylint 2.6.0 or newer. This avoids the
   need to skip the obsolete ``bad-continuation`` check now removed from Pylint.
 - Fix linter output parsing for full Windows paths which include a drive letter.
+- Stricter rules for linter output parsing.
 
 
 1.5.0_ - 2022-04-23

diff --git a/src/darker/linting.py b/src/darker/linting.py
@@ -31,21 +31,71 @@
 logger = logging.getLogger(__name__)
 
 
+def _strict_nonneg_int(text: str) -> int:
+    """Strict parsing of strings to non-negative integers
+
+    Allow no leading or trailing whitespace, nor plus or minus signs.
+
+    :param text: The string to convert
+    :raises ValueError: Raises if the string has any non-numeric characters
+    :return: [description]
+    :rtype: [type]
+    """
+    if text.strip("+-\t ") != text:
+        raise ValueError(r"invalid literal for int() with base 10: {text}")
+    return int(text)
+
+
 def _parse_linter_line(line: str, root: Path) -> Tuple[Path, int, str, str]:
-    # Parse an error/note line.
-    # Given: line == "dir/file.py:123: error: Foo\n"
-    # Sets: path = Path("abs/path/to/dir/file.py:123"
-    #       linenum = 123
-    #       description = "error: Foo"
+    """Parse one line of linter output
+
+    Only parses lines with
+    - a file path (without leading-trailing whitespace),
+    - a non-negative line number (without leading/trailing whitespace),
+    - optionally a column number (without leading/trailing whitespace), and
+    - a description.
+
+    Examples of successfully parsed lines::
+
+        path/to/file.py:42: Description
+        path/to/file.py:42:5: Description
+
+    Given a root of ``Path("path/")``, these would be parsed into::
+
+        (Path("to/file.py"), 42, "path/to/file.py:42:", "Description")
+        (Path("to/file.py"), 42, "path/to/file.py:42:5:", "Description")
+
+    For all other lines, a dummy entry is returned: an empty path, zero as the line
+    number, an empty location string and an empty description. Such lines should be
+    simply ignored, since many linters display supplementary information insterspersed
+    with the actual linting notifications.
+
+    :param line: The linter output line to parse. May have a trailing newline.
+    :param root: The root directory to resolve full file paths against
+    :return: A 4-tuple of
+             - a ``root``-relative file path,
+             - the line number,
+             - the path and location string, and
+             - the description.
+
+    """
     try:
-        location, description = line[:-1].split(": ", 1)
-        path_str, linenum_str, *rest = location.split(":")
-        linenum = int(linenum_str)
+        location, description = line.rstrip().split(": ", 1)
+        if location[1:3] == ":\\":
+            # Absolute Windows paths need special handling. Separate out the ``C:`` (or
+            # similar), then split by colons, and finally re-insert the ``C:``.
+            path_in_drive, linenum_str, *rest = location[2:].split(":")
+            path_str = f"{location[:2]}{path_in_drive}"
+        else:
+            path_str, linenum_str, *rest = location.split(":")
+        if path_str.strip() != path_str:
+            raise ValueError(r"Filename {path_str!r} has leading/trailing whitespace")
+        linenum = _strict_nonneg_int(linenum_str)
         if len(rest) > 1:
-            raise ValueError("Too many colon-separated tokens")
+            raise ValueError("Too many colon-separated tokens in {location!r}")
         if len(rest) == 1:
             # Make sure it column looks like an int on "<path>:<linenum>:<column>"
-            _column = int(rest[0])  # noqa: F841
+            _column = _strict_nonneg_int(rest[0])  # noqa: F841
     except ValueError:
         # Encountered a non-parsable line which doesn't express a linting error.
         # For example, on Mypy:
@@ -56,8 +106,12 @@ def _parse_linter_line(line: str, root: Path) -> Tuple[Path, int, str, str]:
     path_from_cwd = Path(path_str).absolute()
     try:
         path_in_repo = path_from_cwd.relative_to(root)
-    except ValueError as exc_info:
-        logger.debug("Unparsable linter output: %s (%s)", line[:-1], exc_info)
+    except ValueError:
+        logger.warning(
+            "Linter message for a file %s outside requested directory %s",
+            path_from_cwd,
+            root,
+        )
         return Path(), 0, "", ""
     return path_in_repo, linenum, location + ":", description
 
@@ -135,6 +189,13 @@ def run_linter(
                 or linter_error_linenum == 0
             ):
                 continue
+            if path_in_repo.suffix != ".py":
+                logger.warning(
+                    "Linter message for a non-Python file: %s %s",
+                    location,
+                    description,
+                )
+                continue
             try:
                 edited_linenums = edited_linenums_differ.compare_revisions(
                     path_in_repo, context_lines=0

diff --git a/src/darker/tests/conftest.py b/src/darker/tests/conftest.py
@@ -1,9 +1,10 @@
 """Configuration and fixtures for the Pytest based test suite"""
 
 import os
+import re
 from pathlib import Path
 from subprocess import check_call  # nosec
-from typing import Dict, Union
+from typing import Dict, Iterable, List, Union
 
 import pytest
 from black import find_project_root as black_find_project_root
@@ -84,6 +85,21 @@ def create_branch(self, new_branch: str, start_point: str) -> None:
         """Fixture method to create and check out new branch at given starting point"""
         self._run("checkout", "-b", new_branch, start_point)
 
+    def expand_root(self, lines: Iterable[str]) -> List[str]:
+        """Replace "{root/<path>}" in strings with the path in the temporary Git repo
+
+        This is used to generate expected strings corresponding to locations of files in
+        the temporary Git repository.
+
+        :param lines: The lines of text to process
+        :return: Given lines with paths processed
+
+        """
+        return [
+            re.sub(r"\{root/(.*?)\}", lambda m: str(self.root / str(m.group(1))), line)
+            for line in lines
+        ]
+
 
 @pytest.fixture
 def git_repo(tmp_path, monkeypatch):

diff --git a/src/darker/tests/test_linting.py b/src/darker/tests/test_linting.py
@@ -2,7 +2,8 @@
 
 """Unit tests for :mod:`darker.linting`"""
 
-import re
+import os
+import sys
 from pathlib import Path
 from textwrap import dedent
 from unittest.mock import call, patch
@@ -13,27 +14,83 @@
 from darker.git import WORKTREE, RevisionRange
 from darker.tests.helpers import raises_if_exception
 
+SKIP_ON_WINDOWS = [pytest.mark.skip] if sys.platform.startswith("win") else []
+SKIP_ON_UNIX = [] if sys.platform.startswith("win") else [pytest.mark.skip]
+
 
 @pytest.mark.kwparametrize(
     dict(
-        line="module.py:42: Description\n",
-        expect=(Path("module.py"), 42, "module.py:42:", "Description"),
+        line="module.py:42: Just a line number\n",
+        expect=(Path("module.py"), 42, "module.py:42:", "Just a line number"),
+    ),
+    dict(
+        line="module.py:42:5: With column  \n",
+        expect=(Path("module.py"), 42, "module.py:42:5:", "With column"),
+    ),
+    dict(
+        line="{git_root_absolute}{sep}mod.py:42: Full path\n",
+        expect=(
+            Path("mod.py"),
+            42,
+            "{git_root_absolute}{sep}mod.py:42:",
+            "Full path",
+        ),
+    ),
+    dict(
+        line="{git_root_absolute}{sep}mod.py:42:5: Full path with column\n",
+        expect=(
+            Path("mod.py"),
+            42,
+            "{git_root_absolute}{sep}mod.py:42:5:",
+            "Full path with column",
+        ),
     ),
     dict(
-        line="module.py:42:5: Description\n",
-        expect=(Path("module.py"), 42, "module.py:42:5:", "Description"),
+        line="mod.py:42: 123 digits start the description\n",
+        expect=(Path("mod.py"), 42, "mod.py:42:", "123 digits start the description"),
     ),
-    dict(line="no-linenum.py: Description\n", expect=(Path(), 0, "", "")),
-    dict(line="mod.py:invalid-linenum:5: Description\n", expect=(Path(), 0, "", "")),
+    dict(
+        line="mod.py:42:    indented description\n",
+        expect=(Path("mod.py"), 42, "mod.py:42:", "   indented description"),
+    ),
+    dict(
+        line="mod.py:42:5:    indented description\n",
+        expect=(Path("mod.py"), 42, "mod.py:42:5:", "   indented description"),
+    ),
+    dict(
+        line="nonpython.txt:5: Non-Python file\n",
+        expect=(Path("nonpython.txt"), 5, "nonpython.txt:5:", "Non-Python file"),
+    ),
+    dict(line="mod.py: No line number\n", expect=(Path(), 0, "", "")),
+    dict(line="mod.py:foo:5: Invalid line number\n", expect=(Path(), 0, "", "")),
+    dict(line="mod.py:42:bar: Invalid column\n", expect=(Path(), 0, "", "")),
+    dict(line="/outside/mod.py:5: Outside the repo\n", expect=(Path(), 0, "", "")),
     dict(line="invalid linter output\n", expect=(Path(), 0, "", "")),
+    dict(line=" leading:42: whitespace\n", expect=(Path(), 0, "", "")),
+    dict(line=" leading:42:5 whitespace and column\n", expect=(Path(), 0, "", "")),
+    dict(line="trailing :42: filepath whitespace\n", expect=(Path(), 0, "", "")),
+    dict(line="leading: 42: linenum whitespace\n", expect=(Path(), 0, "", "")),
+    dict(line="trailing:42 : linenum whitespace\n", expect=(Path(), 0, "", "")),
+    dict(line="plus:+42: before linenum\n", expect=(Path(), 0, "", "")),
+    dict(line="minus:-42: before linenum\n", expect=(Path(), 0, "", "")),
+    dict(line="plus:42:+5 before column\n", expect=(Path(), 0, "", "")),
+    dict(line="minus:42:-5 before column\n", expect=(Path(), 0, "", "")),
 )
 def test_parse_linter_line(git_repo, monkeypatch, line, expect):
     """Linter output is parsed correctly"""
     monkeypatch.chdir(git_repo.root)
+    root_abs = git_repo.root.absolute()
+    line_expanded = line.format(git_root_absolute=root_abs, sep=os.sep)
 
-    result = linting._parse_linter_line(line, git_repo.root)
+    result = linting._parse_linter_line(line_expanded, git_repo.root)
 
-    assert result == expect
+    expect_expanded = (
+        expect[0],
+        expect[1],
+        expect[2].format(git_root_absolute=root_abs, sep=os.sep),
+        expect[3],
+    )
+    assert result == expect_expanded
 
 
 @pytest.mark.kwparametrize(
@@ -130,6 +187,38 @@ def test_check_linter_output():
         expect_output=[],
         expect_log=["WARNING Missing file missing.py from echo missing.py:1:"],
     ),
+    dict(
+        _descr="Linter message for a non-Python file is ignored with a warning",
+        paths=["one.py"],
+        location="nonpython.txt:1:",
+        expect_output=[],
+        expect_log=[
+            "WARNING Linter message for a non-Python file: "
+            "nonpython.txt:1: {root/one.py}"
+        ],
+    ),
+    dict(
+        _descr="Message for file outside common root is ignored with a warning (Unix)",
+        paths=["one.py"],
+        location="/elsewhere/mod.py:1:",
+        expect_output=[],
+        expect_log=[
+            "WARNING Linter message for a file /elsewhere/mod.py "
+            "outside requested directory {root/}"
+        ],
+        marks=SKIP_ON_WINDOWS,
+    ),
+    dict(
+        _descr="Message for file outside common root is ignored with a warning (Win)",
+        paths=["one.py"],
+        location="C:\\elsewhere\\mod.py:1:",
+        expect_output=[],
+        expect_log=[
+            "WARNING Linter message for a file C:\\elsewhere\\mod.py "
+            "outside requested directory {root/}"
+        ],
+        marks=SKIP_ON_UNIX,
+    ),
 )
 def test_run_linter(
     git_repo, capsys, caplog, _descr, paths, location, expect_output, expect_log
@@ -147,7 +236,9 @@ def test_run_linter(
           test.py:1: git-repo-root/one.py git-repo-root/two.py
 
     """
-    src_paths = git_repo.add({"test.py": "1\n2\n"}, commit="Initial commit")
+    src_paths = git_repo.add(
+        {"test.py": "1\n2\n", "nonpython.txt": "hello\n"}, commit="Initial commit"
+    )
     src_paths["test.py"].write_bytes(b"one\n2\n")
     cmdline = f"echo {location}"
     revrange = RevisionRange("HEAD", ":WORKTREE:")
@@ -160,12 +251,9 @@ def test_run_linter(
     # by checking standard output from the our `echo` "linter".
     # The test cases also verify that only linter reports on modified lines are output.
     result = capsys.readouterr().out.splitlines()
-    assert result == [
-        re.sub(r"\{root/(.*?)\}", lambda m: str(git_repo.root / m.group(1)), line)
-        for line in expect_output
-    ]
+    assert result == git_repo.expand_root(expect_output)
     logs = [f"{record.levelname} {record.message}" for record in caplog.records]
-    assert logs == expect_log
+    assert logs == git_repo.expand_root(expect_log)
 
 
 def test_run_linter_non_worktree():