diff --git a/CHANGES.rst b/CHANGES.rst index 4656aaf9f..e61c22f0e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,6 +34,9 @@ Fixed the virtualenv. - Allow more time to pass when checking file modification times in a unit test. Windows tests on GitHub are sometimes really slow. +- Multiline strings are now always reformatted completely even if just a part + was modified by the user and reformatted by Black. This prevents the + "back-and-forth indent" symptom. 1.4.1_ - 2022-02-17 diff --git a/src/darker/diff.py b/src/darker/diff.py index 279e22b50..646ae2723 100644 --- a/src/darker/diff.py +++ b/src/darker/diff.py @@ -66,8 +66,9 @@ import logging from difflib import SequenceMatcher -from typing import Generator, List, Tuple +from typing import Generator, List, Sequence, Tuple +from darker.multiline_strings import find_overlap from darker.utils import DiffChunk, TextDocument logger = logging.getLogger(__name__) @@ -109,25 +110,51 @@ def _validate_opcodes(opcodes: List[Tuple[str, int, int, int, int]]) -> None: def opcodes_to_edit_linenums( - opcodes: List[Tuple[str, int, int, int, int]], context_lines: int + opcodes: List[Tuple[str, int, int, int, int]], + context_lines: int, + multiline_string_ranges: Sequence[Tuple[int, int]], ) -> Generator[int, None, None]: """Convert diff opcodes to line numbers of edited lines in the destination file - :param opcodes: The diff opcodes to convert + On top of a straight mapping from line ranges to individual line numbers, this + function extends each diff opcode line range + - upwards and downwards for as many lines as determined by ``context_lines`` + - to make sure the range covers any multiline strings completely + + :param opcodes: The diff opcodes to convert. 0-based, end-exclusive. :param context_lines: The number of lines before and after an edited line to mark edited as well + :param multiline_string_ranges: Line ranges of multi-line strings. 1-based, + end-exclusive. + :return: Generates a list of integer 1-based line numbers """ if not opcodes: return _validate_opcodes(opcodes) + + # Calculate the last line number beyond which we won't extend with extra context + # lines + _tag, _i1, _i2, _j1, last_opcode_end = opcodes[-1] + _, last_multiline_string_end = ( + multiline_string_ranges[-1] if multiline_string_ranges else (None, 0) + ) + lastline = max(last_opcode_end + 1, last_multiline_string_end) + prev_chunk_end = 1 - _tag, _i1, _i2, _j1, end = opcodes[-1] for tag, _i1, _i2, j1, j2 in opcodes: - if tag != "equal": - chunk_end = min(j2 + 1 + context_lines, end + 1) - yield from range(max(j1 + 1 - context_lines, prev_chunk_end), chunk_end) - prev_chunk_end = chunk_end + if tag == "equal": + continue + chunk_start = j1 + 1 - context_lines + chunk_end = j2 + 1 + context_lines + multiline_string_range = find_overlap( + chunk_start, chunk_end, multiline_string_ranges + ) + if multiline_string_range: + chunk_start = min(chunk_start, multiline_string_range[0]) + chunk_end = max(chunk_end, multiline_string_range[1]) + yield from range(max(chunk_start, prev_chunk_end), min(chunk_end, lastline)) + prev_chunk_end = chunk_end def opcodes_to_chunks( diff --git a/src/darker/git.py b/src/darker/git.py index 68b709980..75ff318c4 100644 --- a/src/darker/git.py +++ b/src/darker/git.py @@ -12,6 +12,7 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, overload from darker.diff import diff_and_get_opcodes, opcodes_to_edit_linenums +from darker.multiline_strings import get_multiline_string_ranges from darker.utils import GIT_DATEFORMAT, TextDocument logger = logging.getLogger(__name__) @@ -377,7 +378,12 @@ def git_get_modified_python_files( def _revision_vs_lines( root: Path, path_in_repo: Path, rev1: str, content: TextDocument, context_lines: int ) -> List[int]: - """For file `path_in_repo`, return changed line numbers from given revision + """Return changed line numbers between the given revision and given text content + + The revision range was provided when this `EditedLinenumsDiffer` object was + created. Content for the given `path_in_repo` at `rev1` of that revision is + taken from the Git repository, and the provided text content is compared against + that historical version. The actual implementation is here instead of in `EditedLinenumsDiffer._revision_vs_lines` so it is accessibe both as a method and as @@ -393,7 +399,10 @@ def _revision_vs_lines( """ old = git_get_content_at_revision(path_in_repo, rev1, root) edited_opcodes = diff_and_get_opcodes(old, content) - return list(opcodes_to_edit_linenums(edited_opcodes, context_lines)) + multiline_string_ranges = get_multiline_string_ranges(content) + return list( + opcodes_to_edit_linenums(edited_opcodes, context_lines, multiline_string_ranges) + ) @lru_cache(maxsize=1) @@ -455,7 +464,12 @@ def compare_revisions(self, path_in_repo: Path, context_lines: int) -> List[int] def revision_vs_lines( self, path_in_repo: Path, content: TextDocument, context_lines: int ) -> List[int]: - """For file `path_in_repo`, return changed line numbers from given revision + """Return changed line numbers between the given revision and given text content + + The revision range was provided when this `EditedLinenumsDiffer` object was + created. Content for the given `path_in_repo` at `rev1` of that revision is + taken from the Git repository, and the provided text content is compared against + that historical version. The actual implementation is in the module global `_revision_vs_lines` function so this can be called from both as a method and the module global diff --git a/src/darker/multiline_strings.py b/src/darker/multiline_strings.py new file mode 100644 index 000000000..a74f94161 --- /dev/null +++ b/src/darker/multiline_strings.py @@ -0,0 +1,55 @@ +"""Helper functions for dealing with reformatting multi-line strings""" + +from tokenize import STRING, tokenize +from typing import List, Optional, Sequence, Tuple + +from darker.utils import TextDocument + + +def get_multiline_string_ranges(content: TextDocument) -> List[Tuple[int, int]]: + """Return the line ranges of multi-line strings found in the given Python source + + The returned data is 1-based, end-exclusive. In other words, each item is a 1-based + ``(start, end)`` tuple, ``end`` being the line number following the last line of the + multi-line string. + + :return: Line number ranges of multi-line strings + + """ + readline = (f"{line}\n".encode(content.encoding) for line in content.lines).__next__ + return [ + (t.start[0], t.end[0] + 1) + for t in tokenize(readline) + if t.type == STRING and t.end[0] > t.start[0] + ] + + +def find_overlap( + start: int, end: int, ranges: Sequence[Tuple[int, int]] +) -> Optional[Tuple[int, int]]: + """Return the convex hull of given ranges which overlap with given start and end + + `start..end` must be end-exclusive. + + :param start: The first line of the range to find overlaps for. + :param end: The first line after the range to find overlaps for. + :param ranges: The ranges to scan when looking for an overlap with `(start, end)`. + End-exclusive, and either 0- or 1-based as long as similarly based as + `(start, end)`. + :return: The convex hull, i.e. range from start of first until the end of last range + which overlap with the given `(start, end)` range + + """ + overlap: Optional[Tuple[int, int]] = None + for range_start, range_end in ranges: + if end <= range_start: + break + if start < range_end: + if overlap: + overlap = ( + overlap[0], # pylint: disable=unsubscriptable-object + range_end, + ) + else: + overlap = range_start, range_end + return overlap diff --git a/src/darker/tests/test_diff.py b/src/darker/tests/test_diff.py index e5310f31b..2fbe0b839 100644 --- a/src/darker/tests/test_diff.py +++ b/src/darker/tests/test_diff.py @@ -180,6 +180,7 @@ def test_opcodes_to_chunks(): EXAMPLE_OPCODES = [ + # 0-based, end-exclusive ("replace", 0, 4, 0, 1), ("equal", 4, 6, 1, 3), ("replace", 6, 8, 3, 5), @@ -194,23 +195,64 @@ def test_opcodes_to_chunks(): ("equal", 20, 23, 23, 26), ("insert", 23, 23, 26, 27), ("equal", 23, 24, 27, 28), + ("replace", 24, 34, 28, 38), + ("equal", 34, 35, 38, 39), ] @pytest.mark.kwparametrize( - dict(context_lines=0, expect=[1, 4, 5, 13, 14, 17, 20, 22, 23, 27]), - dict(context_lines=1, expect=[[1, 6], [12, 24], [26, 28]]), - dict(context_lines=2, expect=[[1, 7], [11, 28]]), + dict( + context_lines=0, + multiline_string_ranges=[], + expect=[0, 3, 4, 12, 13, 16, 19, 21, 22, 26, [28, 37]], # 0-based + ), + dict( + context_lines=1, + multiline_string_ranges=[], + expect=[[0, 5], [11, 23], [25, 38]], # 0-based, end-inclusive + ), + dict( + context_lines=2, + multiline_string_ranges=[], + expect=[[0, 6], [10, 38]], # 0-based, end-inclusive + ), + dict( + context_lines=0, + multiline_string_ranges=[ # 0-based, end exclusive + (2, 4), # partial left overlap with (3, 5) + (13, 15), # partial right overlap with (12, 14) + (16, 17), # exact overlap with (16, 17) + (18, 21), # overextending overlap with (19, 20) + (22, 27), # inner overlap with (21, 23) and full overlap with (26, 27) + (28, 30), # full overlap with (28, 38)... + (36, 46), # ...partial left overlap with (28, 38) + ], + expect=[0, [2, 4], [12, 14], 16, [18, 26], [28, 45]], # 0-based, end-inclusive + ), ) -def test_opcodes_to_edit_linenums(context_lines, expect): - edit_linenums = list(opcodes_to_edit_linenums(EXAMPLE_OPCODES, context_lines)) +def test_opcodes_to_edit_linenums(context_lines, multiline_string_ranges, expect): + """`opcodes_to_edit_linenums()` gives correct results""" + edit_linenums = list( + opcodes_to_edit_linenums( + EXAMPLE_OPCODES, + context_lines, + # Convert ranges from 0 to 1 based. The test case is defined using 0-based + # ranges so it's easier to reason about the relation between multi-line + # string ranges and opcode ranges. + [(start + 1, end + 1) for start, end in multiline_string_ranges], + ) + ) + # Normalize expected lines/ranges to 0-based, end-exclusive. expect_ranges = [[n, n] if isinstance(n, int) else n for n in expect] expect_linenums = list(chain(*(range(n[0], n[1] + 1) for n in expect_ranges))) - assert edit_linenums == expect_linenums + # Normalize result to 0-based, end-exclusive before comparison + assert [linenum - 1 for linenum in edit_linenums] == expect_linenums def test_opcodes_to_edit_linenums_empty_opcodes(): - result = list(opcodes_to_edit_linenums([], context_lines=0)) + result = list( + opcodes_to_edit_linenums([], context_lines=0, multiline_string_ranges=[]) + ) assert result == [] diff --git a/src/darker/tests/test_git.py b/src/darker/tests/test_git.py index 189865ad3..610b75b7e 100644 --- a/src/darker/tests/test_git.py +++ b/src/darker/tests/test_git.py @@ -7,6 +7,7 @@ from datetime import datetime, timedelta from pathlib import Path from subprocess import DEVNULL, PIPE, CalledProcessError, check_call # nosec +from textwrap import dedent # nosec from typing import List, Union from unittest.mock import call, patch @@ -856,6 +857,51 @@ def test_edited_linenums_differ_revision_vs_lines(git_repo, context_lines, expec assert linenums == expect +@pytest.mark.kwparametrize( + dict(context_lines=0, expect=[1, 3, 4, 5, 6, 8]), + dict(context_lines=1, expect=[1, 2, 3, 4, 5, 6, 7, 8]), +) +def test_edited_linenums_differ_revision_vs_lines_multiline_strings( + git_repo, context_lines, expect +): + """Tests for EditedLinenumsDiffer.revision_vs_lines() with multi-line strings""" + git_repo.add( + { + "a.py": dedent( + """\ + change\n + keep\n + '''change first,\n + keep second\n + and third,\n + change fourth line of multiline'''\n + keep\n + change\n + """ + ) + }, + commit="Initial commit", + ) + content = TextDocument.from_lines( + [ + "CHANGED", + "keep", + "'''CHANGED FIRST,", + "keep second", + "and third,", + "CHANGED FOURTH LINE OF MULTILINE'''", + "keep", + "CHANGED", + ] + ) + revrange = git.RevisionRange("HEAD", ":WORKTREE:") + differ = git.EditedLinenumsDiffer(git_repo.root, revrange) + + linenums = differ.revision_vs_lines(Path("a.py"), content, context_lines) + + assert linenums == expect + + def test_local_gitconfig_ignored_by_gitrepofixture(tmp_path): """Tests that ~/.gitconfig is ignored when running darker's git tests""" (tmp_path / "HEAD").write_text("ref: refs/heads/main") diff --git a/src/darker/tests/test_main_blacken_single_file.py b/src/darker/tests/test_main_blacken_single_file.py index 62dd37f6e..ab38b1e99 100644 --- a/src/darker/tests/test_main_blacken_single_file.py +++ b/src/darker/tests/test_main_blacken_single_file.py @@ -72,3 +72,51 @@ def test_blacken_single_file_common_ancestor(git_repo): "", "print(a + b) # changed", ) + + +def test_reformat_single_file_docstring(git_repo): + """``_reformat_single_file()`` handles docstrings as one contiguous block""" + initial = dedent( + '''\ + def docstring_func(): + """ + originally unindented + + originally indented + """ + ''' + ) + modified = dedent( + '''\ + def docstring_func(): + """ + originally unindented + + modified and still indented + """ + ''' + ) + expect = dedent( + '''\ + def docstring_func(): + """ + originally unindented + + modified and still indented + """ + ''' + ) + paths = git_repo.add({"a.py": initial}, commit="Initial commit") + paths["a.py"].write_text(modified) + + result = darker.__main__._blacken_single_file( + git_repo.root, + Path("a.py"), + RevisionRange("HEAD", ":WORKTREE:"), + rev2_content=TextDocument.from_str(modified), + rev2_isorted=TextDocument.from_str(modified), + enable_isort=False, + black_config={}, + ) + + assert result.lines == tuple(expect.splitlines()) diff --git a/src/darker/tests/test_multiline_strings.py b/src/darker/tests/test_multiline_strings.py new file mode 100644 index 000000000..f8ec35c70 --- /dev/null +++ b/src/darker/tests/test_multiline_strings.py @@ -0,0 +1,126 @@ +"""Tests for `darker.multiline_strings`""" + +import pytest + +from darker import multiline_strings +from darker.utils import TextDocument + + +def test_get_multiline_string_ranges(): + """`get_multiline_string_ranges()` identifies multi-line strings correctly""" + content = TextDocument.from_lines( + line + for _, line in [ + # Dummy line numbers added to ease debugging of failures + (1, "'single-quoted string'"), + (2, "f'single-quoted f-string'"), + (3, "r'single-quoted raw string'"), + (4, "rf'single-quoted raw f-string'"), + (5, "b'single-quoted bytestring'"), + (6, "rb'single-quoted raw bytestring'"), + (7, "'''triple-single-quoted one-line string'''"), + (8, "f'''triple-single-quoted one-line f-string'''"), + (9, "r'''triple-single-quoted one-line raw string'''"), + (10, "rf'''triple-single-quoted one-line raw f-string'''"), + (11, "b'''triple-single-quoted one-line bytestring'''"), + (12, "rb'''triple-single-quoted one-line raw bytestring'''"), + (13, "'''triple-single-quoted"), + (14, " two-line string'''"), + (15, "f'''triple-single-quoted"), + (16, " two-line f-string'''"), + (17, "r'''triple-single-quoted"), + (18, " two-line raw string'''"), + (19, "rf'''triple-single-quoted"), + (20, " two-line raw f-string'''"), + (21, "b'''triple-single-quoted"), + (22, " two-line bytestring'''"), + (23, "rb'''triple-single-quoted"), + (24, " two-line raw bytestring'''"), + (25, '"double-quoted string"'), + (26, 'f"double-quoted f-string"'), + (27, 'r"double-quoted raw string"'), + (28, 'rf"double-quoted raw f-string"'), + (29, 'b"double-quoted bytestring"'), + (30, 'rb"double-quoted raw bytestring"'), + (31, '"""triple-double-quoted one-line string"""'), + (32, 'f"""triple-double-quoted one-line f-string"""'), + (33, 'r"""triple-double-quoted one-line raw string"""'), + (34, 'rf"""triple-double-quoted one-line raw f-string"""'), + (35, 'b"""triple-double-quoted one-line bytestring"""'), + (36, 'rb"""triple-double-quoted one-line raw bytestring"""'), + (37, '"""triple-double-quoted'), + (38, ' two-line string"""'), + (39, 'f"""triple-double-quoted'), + (40, ' two-line f-string"""'), + (41, 'r"""triple-double-quoted'), + (42, ' two-line raw string"""'), + (43, 'rf"""triple-double-quoted'), + (44, ' two-line raw f-string"""'), + (45, 'b"""triple-double-quoted'), + (46, ' two-line bytestring"""'), + (47, 'rb"""triple-double-quoted'), + (48, ' two-line raw bytestring"""'), + (49, '"""triple-'), + (50, " double-"), + (51, " quoted"), + (52, " six-"), + (53, " line"), + (54, ' string"""'), + ] + ) + + result = multiline_strings.get_multiline_string_ranges(content) + + assert result == [ + # 1-based, end-exclusive + (13, 15), + (15, 17), + (17, 19), + (19, 21), + (21, 23), + (23, 25), + (37, 39), + (39, 41), + (41, 43), + (43, 45), + (45, 47), + (47, 49), + (49, 55), + ] + + +# End-exclusive +TEST_RANGES = [(2, 2), (5, 6), (9, 11), (14, 17)] + + +@pytest.mark.kwparametrize( + # `(start, end)` and `ranges` are end-exclusive + dict(start=0, end=0, ranges=[], expect=None), + dict(start=0, end=42, ranges=[], expect=None), + dict(start=0, end=0, ranges=TEST_RANGES, expect=None), + dict(start=1, end=2, ranges=TEST_RANGES, expect=None), + dict(start=2, end=2, ranges=TEST_RANGES, expect=None), + dict(start=1, end=3, ranges=TEST_RANGES, expect=(2, 2)), + dict(start=2, end=3, ranges=TEST_RANGES, expect=None), + dict(start=3, end=3, ranges=TEST_RANGES, expect=None), + dict(start=4, end=5, ranges=TEST_RANGES, expect=None), + dict(start=5, end=5, ranges=TEST_RANGES, expect=None), + dict(start=4, end=6, ranges=TEST_RANGES, expect=(5, 6)), + dict(start=5, end=6, ranges=TEST_RANGES, expect=(5, 6)), + dict(start=6, end=6, ranges=TEST_RANGES, expect=None), + dict(start=4, end=7, ranges=TEST_RANGES, expect=(5, 6)), + dict(start=5, end=7, ranges=TEST_RANGES, expect=(5, 6)), + dict(start=6, end=7, ranges=TEST_RANGES, expect=None), + dict(start=10, end=10, ranges=TEST_RANGES, expect=(9, 11)), + dict(start=10, end=11, ranges=TEST_RANGES, expect=(9, 11)), + dict(start=10, end=12, ranges=TEST_RANGES, expect=(9, 11)), + dict(start=11, end=11, ranges=TEST_RANGES, expect=None), + dict(start=11, end=12, ranges=TEST_RANGES, expect=None), + dict(start=12, end=12, ranges=TEST_RANGES, expect=None), + dict(start=10, end=19, ranges=TEST_RANGES, expect=(9, 17)), +) +def test_find_overlap(start, end, ranges, expect): + """`find_overlap()` finds the enclosing range for overlapping ranges""" + result = multiline_strings.find_overlap(start, end, ranges) + + assert result == expect