Skip to content

Commit 8bbf1a3

Browse files
committed
Merge pull request #412 from nvie/fix-diff-patch-parsing
Fix diff patch parsing
2 parents 0d7a40f + 1445b59 commit 8bbf1a3

File tree

5 files changed

+58
-19
lines changed

5 files changed

+58
-19
lines changed

doc/source/changes.rst

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,23 @@ Changelog
55
1.0.3 - Fixes
66
=============
77

8-
* `Commit.diff()` now supports diffing the root commit via `Commit.diff(NULL_TREE)`.
9-
* `Repo.blame()` now respects `incremental=True`, supporting incremental blames. Incremental blames are slightly faster since they don't include the file's contents in them.
10-
* IMPORTANT: This release drops support for python 2.6, which is officially deprecated by the python maintainers.
8+
* `Commit.diff()` now supports diffing the root commit via
9+
`Commit.diff(NULL_TREE)`.
10+
* `Repo.blame()` now respects `incremental=True`, supporting incremental
11+
blames. Incremental blames are slightly faster since they don't include
12+
the file's contents in them.
13+
* Fix: `Diff` objects created with patch output will now have their
14+
`a_path` and `b_path` properties parsed out correctly. Previously, some
15+
values may have been populated incorrectly when a file was added or
16+
deleted.
17+
* IMPORTANT: This release drops support for python 2.6, which is
18+
officially deprecated by the python maintainers.
19+
* CRITICAL: `Diff` objects created with patch output will now not carry
20+
the --- and +++ header lines anymore. All diffs now start with the
21+
@@ header line directly. Users that rely on the old behaviour can now
22+
(reliably) read this information from the a_path and b_path properties
23+
without having to parse these lines manually.
24+
1125

1226
1.0.2 - Fixes
1327
=============

git/diff.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -198,16 +198,18 @@ class Diff(object):
198198
# precompiled regex
199199
re_header = re.compile(r"""
200200
^diff[ ]--git
201-
[ ](?:a/)?(?P<a_path>.+?)[ ](?:b/)?(?P<b_path>.+?)\n
202-
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
203-
^rename[ ]from[ ](?P<rename_from>\S+)\n
204-
^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
201+
[ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
205202
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
206203
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
204+
(?:^similarity[ ]index[ ]\d+%\n
205+
^rename[ ]from[ ](?P<rename_from>.*)\n
206+
^rename[ ]to[ ](?P<rename_to>.*)(?:\n|$))?
207207
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
208208
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
209209
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
210210
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
211+
(?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
212+
(?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
211213
""".encode('ascii'), re.VERBOSE | re.MULTILINE)
212214
# can be used for comparisons
213215
NULL_HEX_SHA = "0" * 40
@@ -231,15 +233,14 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
231233
if self.b_mode:
232234
self.b_mode = mode_str_to_int(self.b_mode)
233235

234-
if a_blob_id is None:
236+
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
235237
self.a_blob = None
236238
else:
237-
assert self.a_mode is not None
238239
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
239-
if b_blob_id is None:
240+
241+
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
240242
self.b_blob = None
241243
else:
242-
assert self.b_mode is not None
243244
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
244245

245246
self.new_file = new_file
@@ -329,11 +330,23 @@ def _index_from_patch_format(cls, repo, stream):
329330
index = DiffIndex()
330331
previous_header = None
331332
for header in cls.re_header.finditer(text):
332-
a_path, b_path, similarity_index, rename_from, rename_to, \
333-
old_mode, new_mode, new_file_mode, deleted_file_mode, \
334-
a_blob_id, b_blob_id, b_mode = header.groups()
333+
a_path_fallback, b_path_fallback, \
334+
old_mode, new_mode, \
335+
rename_from, rename_to, \
336+
new_file_mode, deleted_file_mode, \
337+
a_blob_id, b_blob_id, b_mode, \
338+
a_path, b_path = header.groups()
335339
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
336340

341+
a_path = a_path or rename_from or a_path_fallback
342+
b_path = b_path or rename_to or b_path_fallback
343+
344+
if a_path == b'/dev/null':
345+
a_path = None
346+
347+
if b_path == b'/dev/null':
348+
b_path = None
349+
337350
# Our only means to find the actual text is to see what has not been matched by our regex,
338351
# and then retro-actively assin it to our index
339352
if previous_header is not None:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
diff --git a/file with spaces b/file with spaces
2+
new file mode 100644
3+
index 0000000000000000000000000000000000000000..75c620d7b0d3b0100415421a97f553c979d75174
4+
--- /dev/null
5+
+++ b/file with spaces
6+
@@ -0,0 +1 @@
7+
+ohai

git/test/fixtures/diff_initial

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
--- /dev/null
2-
+++ b/CHANGES
31
@@ -0,0 +1,7 @@
42
+=======
53
+CHANGES

git/test/test_diff.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_list_from_string_new_mode(self):
7676
self._assert_diff_format(diffs)
7777

7878
assert_equal(1, len(diffs))
79-
assert_equal(10, len(diffs[0].diff.splitlines()))
79+
assert_equal(8, len(diffs[0].diff.splitlines()))
8080

8181
def test_diff_with_rename(self):
8282
output = StringProcessAdapter(fixture('diff_rename'))
@@ -116,7 +116,7 @@ def test_diff_index(self):
116116
res = Diff._index_from_patch_format(None, output.stdout)
117117
assert len(res) == 6
118118
for dr in res:
119-
assert dr.diff
119+
assert dr.diff.startswith(b'@@')
120120
assert str(dr), "Diff to string conversion should be possible"
121121
# end for each diff
122122

@@ -140,7 +140,8 @@ def test_diff_initial_commit(self):
140140

141141
# ...and with creating a patch
142142
diff_index = initial_commit.diff(NULL_TREE, create_patch=True)
143-
assert diff_index[0].b_path == 'CHANGES'
143+
assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
144+
assert diff_index[0].b_path == 'CHANGES', repr(diff_index[0].b_path)
144145
assert diff_index[0].new_file
145146
assert diff_index[0].diff == fixture('diff_initial')
146147

@@ -156,6 +157,12 @@ def test_diff_patch_format(self):
156157
Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
157158
# END for each fixture
158159

160+
def test_diff_with_spaces(self):
161+
data = StringProcessAdapter(fixture('diff_file_with_spaces'))
162+
diff_index = Diff._index_from_patch_format(self.rorepo, data.stdout)
163+
assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
164+
assert diff_index[0].b_path == u'file with spaces', repr(diff_index[0].b_path)
165+
159166
def test_diff_interface(self):
160167
# test a few variations of the main diff routine
161168
assertion_map = dict()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy