Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement annotate. Fixes #245 #536

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@
* Repo is now a context manager, so that it can be easily
closed using a ``with`` statement. (Søren Løvborg)

IMPROVEMENTS

* Add naive annotate implementation in ``dulwich.annotate``.
It works, but performance needs work. (Jelmer Vernooij)

TEST FIXES

* Only run worktree list compat tests against git 2.7.0,
Expand Down Expand Up @@ -199,7 +204,7 @@
(Mark Mikofski)

* Add ``Blob.splitlines`` method.
(Jelmer Vernooij)
(Jelmer Vernooij)

BUG FIXES

Expand All @@ -218,9 +223,9 @@
BUG FIXES

* Allow missing trailing LF when reading service name from
HTTP servers. (Jelmer Vernooij, Andrew Shadura, #442)
HTTP servers. (Jelmer Vernooij, Andrew Shadura, #442)

* Fix dulwich.porcelain.pull() on Python3. (Jelmer Vernooij, #451)
* Fix dulwich.porcelain.pull() on Python3. (Jelmer Vernooij, #451)

* Properly pull in tags during dulwich.porcelain.clone.
(Jelmer Vernooij, #408)
Expand All @@ -233,7 +238,7 @@

IMPROVEMENTS

* Add ``dulwich.porcelain.ls_tree`` implementation. (Jelmer Vernooij)
* Add ``dulwich.porcelain.ls_tree`` implementation. (Jelmer Vernooij)

0.14.1 2016-07-05

Expand Down
27 changes: 26 additions & 1 deletion bin/dulwich
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import sys
from getopt import getopt
import optparse
import signal
import time

def signal_int(signal, frame):
sys.exit(1)
Expand All @@ -44,7 +45,10 @@ from dulwich.client import get_transport_and_path
from dulwich.errors import ApplyDeltaError
from dulwich.index import Index
from dulwich.pack import Pack, sha_to_hex
from dulwich.patch import write_tree_diff
from dulwich.patch import (
shortid,
write_tree_diff,
)
from dulwich.repo import Repo


Expand Down Expand Up @@ -168,6 +172,26 @@ class cmd_dump_pack(Command):
print("\t%s: Unable to apply delta: %r" % (name, e))


def format_annotate_line(i, commit, entry, line, time_format=None):
if time_format is None:
time_format = "%Y-%m-%d %H:%M:%S %z"
time_str = time.strftime(time_format, time.gmtime(commit.author_time))
author_str = commit.author.split(b' <')[0]
return "%s\t%s\t(%s\t%20s\t%d)%s" % (
commit.id[:8], entry.path, author_str, time_str, i, line)


class cmd_annotate(Command):

def run(self, args):
opts, args = getopt(args, "", [])
opts = dict(opts)

for i, ((commit, entry), line) in enumerate(
porcelain.annotate(".", args.pop(0)), 1):
sys.stdout.write(format_annotate_line(i, commit, entry, line))


class cmd_dump_index(Command):

def run(self, args):
Expand Down Expand Up @@ -540,6 +564,7 @@ For a list of supported commands, see 'dulwich help -a'.

commands = {
"add": cmd_add,
"annotate": cmd_annotate,
"archive": cmd_archive,
"check-ignore": cmd_check_ignore,
"clone": cmd_clone,
Expand Down
93 changes: 93 additions & 0 deletions dulwich/annotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# annotate.py -- Annotate files with last changed revision
# Copyright (C) 2015 Jelmer Vernooij <[email protected]>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# or (at your option) a later version of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

"""Annotate file contents indicating when they were last changed.

Annotated lines are represented as tuples with last modified revision SHA1
and contents.

Please note that this is a very naive annotate implementation. It works,
but its speed could be improved - in particular because it uses
Python's difflib.
"""

import difflib

from dulwich.walk import (
ORDER_DATE,
Walker,
)


# Walk over ancestry graph breadth-first
# When checking each revision, find lines that according to difflib.Differ()
# are common between versions.
# Any lines that are not in common were introduced by the newer revision.
# If there were no lines kept from the older version, stop going deeper in the
# graph.

def update_lines(annotated_lines, new_history_data, new_blob):
"""Update annotation lines with old blob lines.
"""
ret = []
new_lines = new_blob.splitlines()
matcher = difflib.SequenceMatcher(
a=[l for (h, l) in annotated_lines],
b=new_lines)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
ret.extend(annotated_lines[i1:i2])
elif tag in ('insert', 'replace'):
ret.extend([(new_history_data, l) for l in new_lines[j1:j2]])
elif tag == 'delete':
pass # don't care
else:
raise RuntimeError('Unknown tag %s returned in diff' % tag)
return ret


def annotate_lines(store, commit_id, path, order=ORDER_DATE, lines=None,
follow=True):
"""Annotate the lines of a blob.

:param store: Object store to retrieve objects from
:param commit_id: Commit id in which to annotate path
:param path: Path to annotate
:param order: Order in which to process history (defaults to ORDER_DATE)
:param lines: Initial lines to compare to (defaults to specified)
:param follow: Wether to follow changes across renames/copies
:return: List of (commit, line) entries where
commit is the oldest commit that changed a line
"""
walker = Walker(store, include=[commit_id], paths=[path], order=order,
follow=follow)
revs = []
for log_entry in walker:
for tree_change in log_entry.changes():
if type(tree_change) is not list:
tree_change = [tree_change]
for change in tree_change:
if change.new.path == path:
path = change.old.path
revs.append((log_entry.commit, change.new))
break

lines = []
for (commit, entry) in reversed(revs):
lines = update_lines(lines, (commit, entry), store[entry.sha])
return lines
2 changes: 2 additions & 0 deletions dulwich/fastexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@


def split_email(text):
# TODO(jelmer): Dedupe this and the same functionality in
# format_annotate_line.
(name, email) = text.rsplit(b" <", 1)
return (name, email.rstrip(b">"))

Expand Down
2 changes: 1 addition & 1 deletion dulwich/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def unified_diff(a, b, fromfile, tofile, n=3):
Based on the same function in Python2.6.5-rc2's difflib.py
"""
started = False
for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
for group in SequenceMatcher(a=a, b=b).get_grouped_opcodes(n):
if not started:
yield b'--- ' + fromfile + b'\n'
yield b'+++ ' + tofile + b'\n'
Expand Down
23 changes: 22 additions & 1 deletion dulwich/porcelain.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Currently implemented:
* archive
* add
* annotate/blame
* branch{_create,_delete,_list}
* check-ignore
* clone
Expand All @@ -36,7 +37,7 @@
* ls-tree
* pull
* push
* rm
* remove/rm
* remote{_add}
* receive-pack
* reset
Expand Down Expand Up @@ -96,6 +97,7 @@
pretty_format_tree_entry,
)
from dulwich.objectspec import (
parse_commit,
parse_object,
parse_reftuples,
)
Expand Down Expand Up @@ -1131,3 +1133,22 @@ def check_ignore(repo, paths, no_index=False):
continue
if ignore_manager.is_ignored(path):
yield path


def annotate(repo, path, committish=None):
"""Annotate the history of a file.

:param repo: Path to the repository
:param path: Path to annotate
:param committish: Commit id to find path in
:return: List of ((Commit, TreeChange), line) tuples
"""
if committish is None:
committish = "HEAD"
from dulwich.annotate import annotate_lines
with open_repo_closing(repo) as r:
commit_id = parse_commit(r, committish).id
return annotate_lines(r.object_store, commit_id, path)


blame = annotate
1 change: 1 addition & 0 deletions dulwich/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def run_command(self, name, args):

def self_test_suite():
names = [
'annotate',
'archive',
'blackbox',
'client',
Expand Down
19 changes: 19 additions & 0 deletions dulwich/tests/test_annotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# test_annotate.py -- tests for annotate
# Copyright (C) 2015 Jelmer Vernooij <[email protected]>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2
# of the License or (at your option) a later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

"""Tests for annotate support."""
2 changes: 1 addition & 1 deletion dulwich/walk.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def changes(self, path_prefix=None):
:return: For commits with up to one parent, a list of TreeChange
objects; if the commit has no parents, these will be relative to
the empty tree. For merge commits, a list of lists of TreeChange
objects; see dulwich.diff.tree_changes_for_merge.
objects; see dulwich.diff_tree.tree_changes_for_merge.
"""
cached = self._changes.get(path_prefix)
if cached is None:
Expand Down