Skip to content

Commit

Permalink
Merge pull request #300 from akaihola/docstring-indent-loop
Browse files Browse the repository at this point in the history
Format docstrings as complete chunks
  • Loading branch information
akaihola authored Apr 5, 2022
2 parents 0446a91 + 0f9a8a2 commit 1e742d7
Show file tree
Hide file tree
Showing 8 changed files with 379 additions and 18 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Fixed
the virtualenv.
- Allow more time to pass when checking file modification times in a unit test.
Windows tests on GitHub are sometimes really slow.
- Multiline strings are now always reformatted completely even if just a part
was modified by the user and reformatted by Black. This prevents the
"back-and-forth indent" symptom.


1.4.1_ - 2022-02-17
Expand Down
43 changes: 35 additions & 8 deletions src/darker/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@

import logging
from difflib import SequenceMatcher
from typing import Generator, List, Tuple
from typing import Generator, List, Sequence, Tuple

from darker.multiline_strings import find_overlap
from darker.utils import DiffChunk, TextDocument

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,25 +110,51 @@ def _validate_opcodes(opcodes: List[Tuple[str, int, int, int, int]]) -> None:


def opcodes_to_edit_linenums(
opcodes: List[Tuple[str, int, int, int, int]], context_lines: int
opcodes: List[Tuple[str, int, int, int, int]],
context_lines: int,
multiline_string_ranges: Sequence[Tuple[int, int]],
) -> Generator[int, None, None]:
"""Convert diff opcodes to line numbers of edited lines in the destination file
:param opcodes: The diff opcodes to convert
On top of a straight mapping from line ranges to individual line numbers, this
function extends each diff opcode line range
- upwards and downwards for as many lines as determined by ``context_lines``
- to make sure the range covers any multiline strings completely
:param opcodes: The diff opcodes to convert. 0-based, end-exclusive.
:param context_lines: The number of lines before and after an edited line to mark
edited as well
:param multiline_string_ranges: Line ranges of multi-line strings. 1-based,
end-exclusive.
:return: Generates a list of integer 1-based line numbers
"""
if not opcodes:
return
_validate_opcodes(opcodes)

# Calculate the last line number beyond which we won't extend with extra context
# lines
_tag, _i1, _i2, _j1, last_opcode_end = opcodes[-1]
_, last_multiline_string_end = (
multiline_string_ranges[-1] if multiline_string_ranges else (None, 0)
)
lastline = max(last_opcode_end + 1, last_multiline_string_end)

prev_chunk_end = 1
_tag, _i1, _i2, _j1, end = opcodes[-1]
for tag, _i1, _i2, j1, j2 in opcodes:
if tag != "equal":
chunk_end = min(j2 + 1 + context_lines, end + 1)
yield from range(max(j1 + 1 - context_lines, prev_chunk_end), chunk_end)
prev_chunk_end = chunk_end
if tag == "equal":
continue
chunk_start = j1 + 1 - context_lines
chunk_end = j2 + 1 + context_lines
multiline_string_range = find_overlap(
chunk_start, chunk_end, multiline_string_ranges
)
if multiline_string_range:
chunk_start = min(chunk_start, multiline_string_range[0])
chunk_end = max(chunk_end, multiline_string_range[1])
yield from range(max(chunk_start, prev_chunk_end), min(chunk_end, lastline))
prev_chunk_end = chunk_end


def opcodes_to_chunks(
Expand Down
20 changes: 17 additions & 3 deletions src/darker/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, overload

from darker.diff import diff_and_get_opcodes, opcodes_to_edit_linenums
from darker.multiline_strings import get_multiline_string_ranges
from darker.utils import GIT_DATEFORMAT, TextDocument

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -377,7 +378,12 @@ def git_get_modified_python_files(
def _revision_vs_lines(
root: Path, path_in_repo: Path, rev1: str, content: TextDocument, context_lines: int
) -> List[int]:
"""For file `path_in_repo`, return changed line numbers from given revision
"""Return changed line numbers between the given revision and given text content
The revision range was provided when this `EditedLinenumsDiffer` object was
created. Content for the given `path_in_repo` at `rev1` of that revision is
taken from the Git repository, and the provided text content is compared against
that historical version.
The actual implementation is here instead of in
`EditedLinenumsDiffer._revision_vs_lines` so it is accessibe both as a method and as
Expand All @@ -393,7 +399,10 @@ def _revision_vs_lines(
"""
old = git_get_content_at_revision(path_in_repo, rev1, root)
edited_opcodes = diff_and_get_opcodes(old, content)
return list(opcodes_to_edit_linenums(edited_opcodes, context_lines))
multiline_string_ranges = get_multiline_string_ranges(content)
return list(
opcodes_to_edit_linenums(edited_opcodes, context_lines, multiline_string_ranges)
)


@lru_cache(maxsize=1)
Expand Down Expand Up @@ -455,7 +464,12 @@ def compare_revisions(self, path_in_repo: Path, context_lines: int) -> List[int]
def revision_vs_lines(
self, path_in_repo: Path, content: TextDocument, context_lines: int
) -> List[int]:
"""For file `path_in_repo`, return changed line numbers from given revision
"""Return changed line numbers between the given revision and given text content
The revision range was provided when this `EditedLinenumsDiffer` object was
created. Content for the given `path_in_repo` at `rev1` of that revision is
taken from the Git repository, and the provided text content is compared against
that historical version.
The actual implementation is in the module global `_revision_vs_lines` function
so this can be called from both as a method and the module global
Expand Down
55 changes: 55 additions & 0 deletions src/darker/multiline_strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Helper functions for dealing with reformatting multi-line strings"""

from tokenize import STRING, tokenize
from typing import List, Optional, Sequence, Tuple

from darker.utils import TextDocument


def get_multiline_string_ranges(content: TextDocument) -> List[Tuple[int, int]]:
"""Return the line ranges of multi-line strings found in the given Python source
The returned data is 1-based, end-exclusive. In other words, each item is a 1-based
``(start, end)`` tuple, ``end`` being the line number following the last line of the
multi-line string.
:return: Line number ranges of multi-line strings
"""
readline = (f"{line}\n".encode(content.encoding) for line in content.lines).__next__
return [
(t.start[0], t.end[0] + 1)
for t in tokenize(readline)
if t.type == STRING and t.end[0] > t.start[0]
]


def find_overlap(
start: int, end: int, ranges: Sequence[Tuple[int, int]]
) -> Optional[Tuple[int, int]]:
"""Return the convex hull of given ranges which overlap with given start and end
`start..end` must be end-exclusive.
:param start: The first line of the range to find overlaps for.
:param end: The first line after the range to find overlaps for.
:param ranges: The ranges to scan when looking for an overlap with `(start, end)`.
End-exclusive, and either 0- or 1-based as long as similarly based as
`(start, end)`.
:return: The convex hull, i.e. range from start of first until the end of last range
which overlap with the given `(start, end)` range
"""
overlap: Optional[Tuple[int, int]] = None
for range_start, range_end in ranges:
if end <= range_start:
break
if start < range_end:
if overlap:
overlap = (
overlap[0], # pylint: disable=unsubscriptable-object
range_end,
)
else:
overlap = range_start, range_end
return overlap
56 changes: 49 additions & 7 deletions src/darker/tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def test_opcodes_to_chunks():


EXAMPLE_OPCODES = [
# 0-based, end-exclusive
("replace", 0, 4, 0, 1),
("equal", 4, 6, 1, 3),
("replace", 6, 8, 3, 5),
Expand All @@ -194,23 +195,64 @@ def test_opcodes_to_chunks():
("equal", 20, 23, 23, 26),
("insert", 23, 23, 26, 27),
("equal", 23, 24, 27, 28),
("replace", 24, 34, 28, 38),
("equal", 34, 35, 38, 39),
]


@pytest.mark.kwparametrize(
dict(context_lines=0, expect=[1, 4, 5, 13, 14, 17, 20, 22, 23, 27]),
dict(context_lines=1, expect=[[1, 6], [12, 24], [26, 28]]),
dict(context_lines=2, expect=[[1, 7], [11, 28]]),
dict(
context_lines=0,
multiline_string_ranges=[],
expect=[0, 3, 4, 12, 13, 16, 19, 21, 22, 26, [28, 37]], # 0-based
),
dict(
context_lines=1,
multiline_string_ranges=[],
expect=[[0, 5], [11, 23], [25, 38]], # 0-based, end-inclusive
),
dict(
context_lines=2,
multiline_string_ranges=[],
expect=[[0, 6], [10, 38]], # 0-based, end-inclusive
),
dict(
context_lines=0,
multiline_string_ranges=[ # 0-based, end exclusive
(2, 4), # partial left overlap with (3, 5)
(13, 15), # partial right overlap with (12, 14)
(16, 17), # exact overlap with (16, 17)
(18, 21), # overextending overlap with (19, 20)
(22, 27), # inner overlap with (21, 23) and full overlap with (26, 27)
(28, 30), # full overlap with (28, 38)...
(36, 46), # ...partial left overlap with (28, 38)
],
expect=[0, [2, 4], [12, 14], 16, [18, 26], [28, 45]], # 0-based, end-inclusive
),
)
def test_opcodes_to_edit_linenums(context_lines, expect):
edit_linenums = list(opcodes_to_edit_linenums(EXAMPLE_OPCODES, context_lines))
def test_opcodes_to_edit_linenums(context_lines, multiline_string_ranges, expect):
"""`opcodes_to_edit_linenums()` gives correct results"""
edit_linenums = list(
opcodes_to_edit_linenums(
EXAMPLE_OPCODES,
context_lines,
# Convert ranges from 0 to 1 based. The test case is defined using 0-based
# ranges so it's easier to reason about the relation between multi-line
# string ranges and opcode ranges.
[(start + 1, end + 1) for start, end in multiline_string_ranges],
)
)
# Normalize expected lines/ranges to 0-based, end-exclusive.
expect_ranges = [[n, n] if isinstance(n, int) else n for n in expect]
expect_linenums = list(chain(*(range(n[0], n[1] + 1) for n in expect_ranges)))

assert edit_linenums == expect_linenums
# Normalize result to 0-based, end-exclusive before comparison
assert [linenum - 1 for linenum in edit_linenums] == expect_linenums


def test_opcodes_to_edit_linenums_empty_opcodes():
result = list(opcodes_to_edit_linenums([], context_lines=0))
result = list(
opcodes_to_edit_linenums([], context_lines=0, multiline_string_ranges=[])
)

assert result == []
46 changes: 46 additions & 0 deletions src/darker/tests/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from datetime import datetime, timedelta
from pathlib import Path
from subprocess import DEVNULL, PIPE, CalledProcessError, check_call # nosec
from textwrap import dedent # nosec
from typing import List, Union
from unittest.mock import call, patch

Expand Down Expand Up @@ -856,6 +857,51 @@ def test_edited_linenums_differ_revision_vs_lines(git_repo, context_lines, expec
assert linenums == expect


@pytest.mark.kwparametrize(
dict(context_lines=0, expect=[1, 3, 4, 5, 6, 8]),
dict(context_lines=1, expect=[1, 2, 3, 4, 5, 6, 7, 8]),
)
def test_edited_linenums_differ_revision_vs_lines_multiline_strings(
git_repo, context_lines, expect
):
"""Tests for EditedLinenumsDiffer.revision_vs_lines() with multi-line strings"""
git_repo.add(
{
"a.py": dedent(
"""\
change\n
keep\n
'''change first,\n
keep second\n
and third,\n
change fourth line of multiline'''\n
keep\n
change\n
"""
)
},
commit="Initial commit",
)
content = TextDocument.from_lines(
[
"CHANGED",
"keep",
"'''CHANGED FIRST,",
"keep second",
"and third,",
"CHANGED FOURTH LINE OF MULTILINE'''",
"keep",
"CHANGED",
]
)
revrange = git.RevisionRange("HEAD", ":WORKTREE:")
differ = git.EditedLinenumsDiffer(git_repo.root, revrange)

linenums = differ.revision_vs_lines(Path("a.py"), content, context_lines)

assert linenums == expect


def test_local_gitconfig_ignored_by_gitrepofixture(tmp_path):
"""Tests that ~/.gitconfig is ignored when running darker's git tests"""
(tmp_path / "HEAD").write_text("ref: refs/heads/main")
Expand Down
48 changes: 48 additions & 0 deletions src/darker/tests/test_main_blacken_single_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,51 @@ def test_blacken_single_file_common_ancestor(git_repo):
"",
"print(a + b) # changed",
)


def test_reformat_single_file_docstring(git_repo):
"""``_reformat_single_file()`` handles docstrings as one contiguous block"""
initial = dedent(
'''\
def docstring_func():
"""
originally unindented
originally indented
"""
'''
)
modified = dedent(
'''\
def docstring_func():
"""
originally unindented
modified and still indented
"""
'''
)
expect = dedent(
'''\
def docstring_func():
"""
originally unindented
modified and still indented
"""
'''
)
paths = git_repo.add({"a.py": initial}, commit="Initial commit")
paths["a.py"].write_text(modified)

result = darker.__main__._blacken_single_file(
git_repo.root,
Path("a.py"),
RevisionRange("HEAD", ":WORKTREE:"),
rev2_content=TextDocument.from_str(modified),
rev2_isorted=TextDocument.from_str(modified),
enable_isort=False,
black_config={},
)

assert result.lines == tuple(expect.splitlines())
Loading

0 comments on commit 1e742d7

Please sign in to comment.