Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a --diff option to match black behavior. #4

Merged
merged 22 commits into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

- Feature: Add support for black config
- Feature: Add support for ``-l``/``--line-length`` and ``-S``/``--skip-string-normalization``
- Feature: ``--diff`` outputs a diff for each file on standard output


0.2.0 / 2020-03-11
------------------
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
- Alexander Tishin (@Mystic-Mirage)
- Antti Kaihola (@akaihola)
- Correy Lim (@CorreyL)
- Matthias Bussonnier (@Carreau)
24 changes: 24 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,30 @@ PyCharm/IntelliJ IDEA

__ https://plugins.jetbrains.com/plugin/7177-file-watchers

Visual Studio Code
------------------

1. Install ``darker``::

$ pip install darker

2. Locate your ``darker`` installation folder.

On macOS / Linux / BSD::

$ which darker
/usr/local/bin/darker # possible location

On Windows::

$ where darker
%LocalAppData%\Programs\Python\Python36-32\Scripts\darker.exe # possible location

3. Add these configuration options::

"python.formatting.provider": "black",
"python.formatting.blackPath": "<install_location_from_step_2>"


How does it work?
=================
Expand Down
102 changes: 69 additions & 33 deletions src/darker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@

import logging
import sys
from difflib import unified_diff
from pathlib import Path
from typing import Dict, Iterable, List, Set, Union
from typing import Dict, Iterable, List, Union

from darker.black_diff import diff_and_get_opcodes, opcodes_to_chunks, run_black
from darker.black_diff import run_black
from darker.chooser import choose_lines
from darker.command_line import ISORT_INSTRUCTION, parse_command_line
from darker.git_diff import (
GitDiffParseError,
get_edit_linenums,
git_diff,
git_diff_name_only,
from darker.diff import (
diff_and_get_opcodes,
opcodes_to_chunks,
opcodes_to_edit_linenums,
)
from darker.git import git_diff_name_only, git_get_unmodified_content
from darker.import_sorting import SortImports, apply_isort
from darker.utils import get_common_root, joinlines
from darker.verification import NotEquivalentError, verify_ast_unchanged
Expand All @@ -26,12 +27,15 @@


def format_edited_parts(
srcs: Iterable[Path], isort: bool, black_args: Dict[str, Union[bool, int]]
srcs: Iterable[Path],
isort: bool,
black_args: Dict[str, Union[bool, int]],
print_diff: bool,
) -> None:
"""Black (and optional isort) formatting for chunks with edits since the last commit

1. run isort on each edited file
2. do a ``git diff -U0 <path> ...`` for all file & dir paths on the command line
2. diff HEAD and worktree for all file & dir paths on the command line
3. extract line numbers in each edited to-file for changed lines
4. run black on the contents of each edited to-file
5. get a diff between the edited to-file and the reformatted content
Expand All @@ -48,31 +52,46 @@ def format_edited_parts(
:param srcs: Directories and files to re-format
:param isort: ``True`` to also run ``isort`` first on each changed file
:param black_args: Command-line arguments to send to ``black.FileMode``
:param print_diff: ``True`` to output diffs instead of modifying source files

"""
remaining_srcs: Set[Path] = set(srcs)
git_root = get_common_root(srcs)
changed_files = git_diff_name_only(srcs, git_root)
head_srcs = {
src: git_get_unmodified_content(src, git_root) for src in changed_files
}
worktree_srcs = {src: (git_root / src).read_text() for src in changed_files}

# 1. run isort
if isort:
changed_files = git_diff_name_only(remaining_srcs, git_root)
apply_isort(changed_files)

for context_lines in range(MAX_CONTEXT_LINES + 1):

# 2. do the git diff
logger.debug("Looking at %s", ", ".join(str(s) for s in remaining_srcs))
logger.debug("Git root: %s", git_root)
git_diff_result = git_diff(remaining_srcs, git_root, context_lines)

# 3. extract changed line numbers for each to-file
remaining_srcs = set()
for src_relative, edited_linenums in get_edit_linenums(git_diff_result):
edited_srcs = {
src: apply_isort(edited_content)
for src, edited_content in worktree_srcs.items()
}
else:
edited_srcs = worktree_srcs

for src_relative, edited_content in edited_srcs.items():
for context_lines in range(MAX_CONTEXT_LINES + 1):
src = git_root / src_relative
if not edited_linenums:
continue
edited = edited_content.splitlines()
head_lines = head_srcs[src_relative]

# 2. diff HEAD and worktree for all file & dir paths on the command line
edited_opcodes = diff_and_get_opcodes(head_lines, edited)

# 3. extract line numbers in each edited to-file for changed lines
edited_linenums = list(opcodes_to_edit_linenums(edited_opcodes))
if (
isort
and not edited_linenums
and edited_content == worktree_srcs[src_relative]
):
logger.debug("No changes in %s after isort", src)
break

# 4. run black
edited, formatted = run_black(src, black_args)
formatted = run_black(src, edited_content, black_args)
logger.debug("Read %s lines from edited file %s", len(edited), src)
logger.debug("Black reformat resulted in %s lines", len(formatted))

Expand All @@ -96,7 +115,9 @@ def format_edited_parts(
len(chosen_lines),
)
try:
verify_ast_unchanged(edited, result_str, black_chunks, edited_linenums)
verify_ast_unchanged(
edited_content, result_str, black_chunks, edited_linenums
)
except NotEquivalentError:
# Diff produced misaligned chunks which couldn't be reconstructed into
# a partially re-formatted Python file which produces an identical AST.
Expand All @@ -109,14 +130,29 @@ def format_edited_parts(
"Trying again with %s lines of context for `git diff -U`",
context_lines + 1,
)
remaining_srcs.add(src)
continue
else:
# 10. A re-formatted Python file which produces an identical AST was
# created successfully - write an updated file
logger.info("Writing %s bytes into %s", len(result_str), src)
src.write_text(result_str)
if not remaining_srcs:
break
# or print the diff
if print_diff:
difflines = list(
unified_diff(
worktree_srcs[src_relative].splitlines(),
chosen_lines,
src.as_posix(),
src.as_posix(),
)
)
if len(difflines) > 2:
h1, h2, *rest = difflines
print(h1, end="")
print(h2, end="")
print("\n".join(rest))
else:
logger.info("Writing %s bytes into %s", len(result_str), src)
src.write_text(result_str)
break


def main(argv: List[str] = None) -> None:
Expand Down Expand Up @@ -149,7 +185,7 @@ def main(argv: List[str] = None) -> None:
black_args["skip_string_normalization"] = args.skip_string_normalization

paths = {Path(p) for p in args.src}
format_edited_parts(paths, args.isort, black_args)
format_edited_parts(paths, args.isort, black_args, args.diff)


if __name__ == "__main__":
Expand Down
136 changes: 22 additions & 114 deletions src/darker/black_diff.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,41 @@
"""Turn Python code into chunks of original and re-formatted code

The functions in this module implement three steps
for converting a file with Python source code into a list of chunks.
From these chunks, the same file can be reconstructed
while choosing whether each chunk should be taken from the original untouched file
or from the version reformatted with Black.
"""Re-format Python source code using Black

In examples below, a simple two-line snippet is used.
The first line will be reformatted by Black, and the second left intact::

>>> from unittest.mock import Mock
>>> src = Mock()
>>> src.read_text.return_value = '''\\
>>> src_content = '''\\
... for i in range(5): print(i)
... print("done")
... '''

First, :func:`run_black` uses Black to reformat the contents of a given file.
Original and reformatted lines are returned e.g.::
Reformatted lines are returned e.g.::

>>> src_lines, dst_lines = run_black(src, black_args={})
>>> src_lines
['for i in range(5): print(i)',
'print("done")']
>>> dst_lines = run_black(src, src_content, black_args={})
>>> dst_lines
['for i in range(5):',
' print(i)',
'print("done")']

The output of :func:`run_black` should then be fed into :func:`diff_and_get_opcodes`.
It divides a diff between the original and reformatted content
into alternating chunks of
intact (represented by the 'equal' tag) and
modified ('delete', 'replace' or 'insert' tag) lines.
Each chunk is an opcode represented by the tag and the corresponding 0-based line ranges
in the original and reformatted content, e.g.::

>>> opcodes = diff_and_get_opcodes(src_lines, dst_lines)
>>> len(opcodes)
2
>>> opcodes[0] # split 'for' loop into two lines
('replace', 0, 1, 0, 2)
>>> opcodes[1] # keep 'print("done")' as such
('equal', 1, 2, 2, 3)

Finally, :func:`opcodes_to_chunks` picks the lines
from original and reformatted content for each opcode.
It combines line content with the 1-based line offset in the original content, e.g.::

>>> chunks = list(opcodes_to_chunks(opcodes, src_lines, dst_lines))
>>> len(chunks)
2
>>> chunks[0] # (<offset in orig content>, <original lines>, <reformatted lines>)
(1,
['for i in range(5): print(i)'],
['for i in range(5):',
' print(i)'])
>>> chunks[1]
(2,
['print("done")'],
['print("done")'])

By concatenating the second items in these tuples, i.e. original lines,
the original file can be reconstructed.

By concatenating the third items, i.e. reformatted lines,
the complete output from Black can be reconstructed.

By concatenating and choosing either the second or third item,
a mixed result with only selected regions reformatted can be reconstructed.
See :mod:`darker.diff` and :mod:`darker.chooser`
for how this result is further processed with:

- :func:`~darker.diff.diff_and_get_opcodes`
to get a diff of the reformatting
- :func:`~darker.diff.opcodes_to_chunks`
to split the diff into chunks of original and reformatted content
- :func:`~darker.chooser.choose_lines`
to reconstruct the source code from original and reformatted chunks
based on whether reformats touch user-edited lines

"""

import logging
from difflib import SequenceMatcher
from functools import lru_cache
from pathlib import Path
from typing import Dict, Generator, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union

from black import FileMode, format_str, read_pyproject_toml
from click import Command, Context, Option
Expand Down Expand Up @@ -104,12 +63,14 @@ def read_black_config(src: Path, value: Optional[str]) -> Dict[str, Union[bool,


def run_black(
src: Path, black_args: Dict[str, Union[bool, int]]
) -> Tuple[List[str], List[str]]:
"""Run the black formatter for the contents of the given Python file
src: Path, src_contents: str, black_args: Dict[str, Union[bool, int]]
) -> List[str]:
"""Run the black formatter for the Python source code given as a string

Return lines of the original file as well as the formatted content.

:param src: The originating file path for the source code
:param src_contents: The source code as a string
:param black_args: Command-line arguments to send to ``black.FileMode``

"""
Expand All @@ -133,59 +94,6 @@ def run_black(
# from the command line arguments
mode = FileMode(**effective_args)

src_contents = src.read_text()
dst_contents = format_str(src_contents, mode=mode)
return src_contents.splitlines(), dst_contents.splitlines()


def diff_and_get_opcodes(
src_lines: List[str], dst_lines: List[str]
) -> List[Tuple[str, int, int, int, int]]:
"""Return opcodes and line numbers for chunks in the diff of two lists of strings

The opcodes are 5-tuples for each chunk with

- the tag of the operation ('equal', 'delete', 'replace' or 'insert')
- the number of the first line in the chunk in the from-file
- the number of the last line in the chunk in the from-file
- the number of the first line in the chunk in the to-file
- the number of the last line in the chunk in the to-file

Line numbers are zero based.

"""
matcher = SequenceMatcher(None, src_lines, dst_lines, autojunk=False)
opcodes = matcher.get_opcodes()
logger.debug(
"Diff between edited and reformatted has %s opcode%s",
len(opcodes),
"s" if len(opcodes) > 1 else "",
)
return opcodes


def opcodes_to_chunks(
opcodes: List[Tuple[str, int, int, int, int]],
src_lines: List[str],
dst_lines: List[str],
) -> Generator[Tuple[int, List[str], List[str]], None, None]:
"""Convert each diff opcode to a line number and original plus modified lines

Each chunk is a 3-tuple with

- the 1-based number of the first line in the chunk in the from-file
- the original lines of the chunk in the from-file
- the modified lines of the chunk in the to-file

Based on this, the patch can be constructed by choosing either original or modified
lines for each chunk and concatenating them together.

"""
# Make sure every other opcode is an 'equal' tag
assert all(
(tag1 == "equal") != (tag2 == "equal")
for (tag1, _, _, _, _), (tag2, _, _, _, _) in zip(opcodes[:-1], opcodes[1:])
), opcodes

for tag, i1, i2, j1, j2 in opcodes:
yield i1 + 1, src_lines[i1:i2], dst_lines[j1:j2]
dst_lines: List[str] = dst_contents.splitlines()
return dst_lines
5 changes: 5 additions & 0 deletions src/darker/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def parse_command_line(argv: List[str]) -> Namespace:
isort_help = ["Also sort imports using the `isort` package"]
if not isort:
isort_help.append(f". {ISORT_INSTRUCTION} to enable usage of this option.")
parser.add_argument(
"--diff",
action="store_true",
help="Don't write the files back, just output a diff for each file on stdout",
)
parser.add_argument(
"-i", "--isort", action="store_true", help="".join(isort_help),
)
Expand Down
Loading