Skip to content

Commit

Permalink
Add support for parsing author lines with no angle brackets
Browse files Browse the repository at this point in the history
This commit adds support for parsing author lines with no angle brackets.
It uses a regexp to avoid hand-writing some backtracking logic.

This also adds tests for other types of brokenness that I found in existing
git repos, and that were already supported. I tried to use the original example
as test data when possible.
  • Loading branch information
progval committed Jan 24, 2022
1 parent 6b53d0b commit 3b8a81d
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 14 deletions.
34 changes: 20 additions & 14 deletions dulwich/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import os
import posixpath
import stat
import re
from typing import (
Optional,
Dict,
Expand Down Expand Up @@ -70,6 +71,11 @@

S_IFGITLINK = 0o160000

# Intentionally flexible to support various types of brokenness
_TIME_ENTRY_RE = re.compile(
b"^(?P<person>.*) (?P<time>-?[0-9]+) (?P<timezone>[+-]{0,2}[0-9]+)$"
)


MAX_TIME = 9223372036854775807 # (2**63) - 1 - signed long int max

Expand Down Expand Up @@ -1211,11 +1217,13 @@ def parse_timezone(text):
and a boolean indicating whether this was a UTC timezone
prefixed with a negative sign (-0000).
"""
if text[0] not in b"+-":
# Some (broken) commits do not have a sign
text = b"+" + text

# cgit parses the first character as the sign, and the rest
# as an integer (using strtol), which could also be negative.
# We do the same for compatibility. See #697828.
if not text[0] in b"+-":
raise ValueError("Timezone must start with + or - (%(text)s)" % vars())
sign = text[:1]
offset = int(text[1:])
if sign == b"-":
Expand Down Expand Up @@ -1259,18 +1267,16 @@ def parse_time_entry(value):
field date)
Returns: Tuple of (author, time, (timezone, timezone_neg_utc))
"""
try:
sep = value.rindex(b"> ")
except ValueError:
return (value, None, (None, False))
try:
person = value[0 : sep + 1]
rest = value[sep + 2 :]
timetext, timezonetext = rest.rsplit(b" ", 1)
time = int(timetext)
timezone, timezone_neg_utc = parse_timezone(timezonetext)
except ValueError as e:
raise ObjectFormatException(e)
m = _TIME_ENTRY_RE.match(value)
if not m:
raise ObjectFormatException("foo")

person = m.group("person")
timetext = m.group("time")
timezonetext = m.group("timezone")
time = int(timetext)
timezone, timezone_neg_utc = parse_timezone(timezonetext)

return person, time, (timezone, timezone_neg_utc)


Expand Down
119 changes: 119 additions & 0 deletions dulwich/tests/test_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,103 @@ def test_check_commit_with_overflow_date(self):
with self.assertRaises(ObjectFormatException):
commit.check()

def test_check_commit_with_negative_date(self):
author_line = (
b'Jane Doe <[email protected]> -12345 +0100'
)
expected_identity = b'Jane Doe <[email protected]>'
expected_time = -12345
expected_timezone = +1 * 60 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_check_commit_with_double_negative_timezone(self):
author_line = (
b'Jane Doe <[email protected]> 12345 --700'
)
expected_identity = b'Jane Doe <[email protected]>'
expected_time = 12345
expected_timezone = +7 * 60 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_commit_with_long_timezone(self):
author_line = (
b'Geoff Cant <[email protected]> 1170648114 -72000'
)
expected_identity = b'Geoff Cant <[email protected]>'
expected_time = 1170648114
expected_timezone = -720 * 60 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_commit_with_short_timezone(self):
author_line = (
b'Pl\xc3\xa1cidoMonteiro <Pl\xc3\xa1cidoMonteiro@.(none)> 1380083482 +02'
)
expected_identity = b'Pl\xc3\xa1cidoMonteiro <Pl\xc3\xa1cidoMonteiro@.(none)>'
expected_time = 1380083482
expected_timezone = +2 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_commit_with_unsigned_timezone(self):
author_line = (
b'applehq <applehq@203d044e-caa7-11dc-91ec-67e1038599e7> 1205785941 0000'
)
expected_identity = b'applehq <applehq@203d044e-caa7-11dc-91ec-67e1038599e7>'
expected_time = 1205785941
expected_timezone = 0
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_commit_with_nonsensical_timezone(self):
"""Timezone is 'UTC + 5 hours and 75 minutes'."""
author_line = (
b'acpmasquerade <[email protected]> 1460127297 +0575'
)
expected_identity = b'acpmasquerade <[email protected]>'
expected_time = 1460127297
expected_timezone = +6 * 60 * 60 + 15 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

commit.check()

def test_mangled_author_line(self):
"""Mangled author line should successfully parse"""
author_line = (
Expand All @@ -722,10 +819,32 @@ def test_mangled_author_line(self):
b'Karl MacMillan <[email protected]> <"Karl MacMillan '
b'<[email protected]>">'
)
expected_time = 1197475547
expected_timezone = -5 * 60 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

# But the check fails because the author identity is bogus
with self.assertRaises(ObjectFormatException):
commit.check()

def test_author_line_missing_brackets(self):
author_line = (
b'[email protected] 1297013737 -0500'
)
expected_identity = b'[email protected]'
expected_time = 1297013737
expected_timezone = -5 * 60 * 60
commit = Commit.from_string(self.make_commit_text(author=author_line))

# The commit parses properly
self.assertEqual(commit.author, expected_identity)
self.assertEqual(commit.author_time, expected_time)
self.assertEqual(commit.author_timezone, expected_timezone)

# But the check fails because the author identity is bogus
with self.assertRaises(ObjectFormatException):
Expand Down

0 comments on commit 3b8a81d

Please sign in to comment.