Skip to content

Commit ab5aa82

Browse files
committed
Properly unescape diff paths
1 parent ea47044 commit ab5aa82

File tree

9 files changed

+204
-43
lines changed

9 files changed

+204
-43
lines changed

lib/git.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
require 'git/branches'
1010
require 'git/config'
1111
require 'git/diff'
12+
require 'git/encoding_utils'
13+
require 'git/escaped_path'
1214
require 'git/index'
1315
require 'git/lib'
1416
require 'git/log'

lib/git/base.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def self.init(directory = '.', options = {})
3636

3737
init_options = {
3838
:bare => options[:bare],
39-
:initial_branch => options[:initial_branch],
39+
:initial_branch => options[:initial_branch]
4040
}
4141

4242
directory = options[:bare] ? options[:repository] : options[:working_directory]

lib/git/diff.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ def process_full_diff
129129
final = {}
130130
current_file = nil
131131
@full_diff.split("\n").each do |line|
132-
if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line)
133-
current_file = m[1]
132+
if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line)
133+
current_file = Git::EscapedPath.new(m[2]).unescape
134134
final[current_file] = defaults.merge({:patch => line, :path => current_file})
135135
else
136136
if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line)

lib/git/encoding_utils.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# frozen_string_literal: true
2+
3+
require 'RCharDet'
4+
5+
module Git
6+
# Method that can be used to detect and normalize string encoding
7+
module EncodingUtils
8+
def self.default_encoding
9+
__ENCODING__.name
10+
end
11+
12+
def self.best_guess_encoding
13+
# Encoding::ASCII_8BIT.name
14+
Encoding::UTF_8.name
15+
end
16+
17+
def self.detected_encoding(str)
18+
CharDet.detect(str)['encoding'] || best_guess_encoding
19+
end
20+
21+
def self.encoding_options
22+
{ invalid: :replace, undef: :replace }
23+
end
24+
25+
def self.normalize_encoding(str)
26+
return str if str.valid_encoding? && str.encoding.name == default_encoding
27+
28+
return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?
29+
30+
str.encode(default_encoding, detected_encoding(str), **encoding_options)
31+
end
32+
end
33+
end

lib/git/escaped_path.rb

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# frozen_string_literal: true
2+
3+
module Git
4+
# Represents an escaped Git path string
5+
#
6+
# Git commands that output paths (e.g. ls-files, diff), will escape usual
7+
# characters in the path with backslashes in the same way C escapes control
8+
# characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values
9+
# larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8).
10+
#
11+
# @example
12+
# Git::GitPath.new('\302\265').unescape # => "µ"
13+
#
14+
class EscapedPath
15+
UNESCAPES = {
16+
'a' => 0x07,
17+
'b' => 0x08,
18+
't' => 0x09,
19+
'n' => 0x0a,
20+
'v' => 0x0b,
21+
'f' => 0x0c,
22+
'r' => 0x0d,
23+
'e' => 0x1b,
24+
'\\' => 0x5c,
25+
'"' => 0x22,
26+
"'" => 0x27
27+
}.freeze
28+
29+
attr_reader :path
30+
31+
def initialize(path)
32+
@path = path
33+
end
34+
35+
# Convert an escaped path to an unescaped path
36+
def unescape
37+
bytes = escaped_path_to_bytes(path)
38+
str = bytes.pack('C*')
39+
str.force_encoding(Encoding::UTF_8)
40+
end
41+
42+
private
43+
44+
def extract_octal(path, index)
45+
[path[index + 1..index + 4].to_i(8), 4]
46+
end
47+
48+
def extract_escape(path, index)
49+
[UNESCAPES[path[index + 1]], 2]
50+
end
51+
52+
def extract_single_char(path, index)
53+
[path[index].ord, 1]
54+
end
55+
56+
def next_byte(path, index)
57+
if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7'
58+
extract_octal(path, index)
59+
elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1])
60+
extract_escape(path, index)
61+
else
62+
extract_single_char(path, index)
63+
end
64+
end
65+
66+
def escaped_path_to_bytes(path)
67+
index = 0
68+
[].tap do |bytes|
69+
while index < path.length
70+
byte, chars_used = next_byte(path, index)
71+
bytes << byte
72+
index += chars_used
73+
end
74+
end
75+
end
76+
end
77+
end

lib/git/lib.rb

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
require 'rchardet'
21
require 'tempfile'
32
require 'zlib'
43

@@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block)
10851084
global_opts = []
10861085
global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil?
10871086
global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil?
1088-
global_opts << ["-c", "color.ui=false"]
1087+
global_opts << %w[-c core.quotePath=true]
1088+
global_opts << %w[-c color.ui=false]
10891089

10901090
opts = [opts].flatten.map {|s| escape(s) }.join(' ')
10911091

@@ -1176,35 +1176,10 @@ def log_path_options(opts)
11761176
arr_opts
11771177
end
11781178

1179-
def default_encoding
1180-
__ENCODING__.name
1181-
end
1182-
1183-
def best_guess_encoding
1184-
# Encoding::ASCII_8BIT.name
1185-
Encoding::UTF_8.name
1186-
end
1187-
1188-
def detected_encoding(str)
1189-
CharDet.detect(str)['encoding'] || best_guess_encoding
1190-
end
1191-
1192-
def encoding_options
1193-
{ invalid: :replace, undef: :replace }
1194-
end
1195-
1196-
def normalize_encoding(str)
1197-
return str if str.valid_encoding? && str.encoding.name == default_encoding
1198-
1199-
return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?
1200-
1201-
str.encode(default_encoding, detected_encoding(str), **encoding_options)
1202-
end
1203-
12041179
def run_command(git_cmd, &block)
12051180
return IO.popen(git_cmd, &block) if block_given?
12061181

1207-
`#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join
1182+
`#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join
12081183
end
12091184

12101185
def escape(s)
@@ -1225,6 +1200,5 @@ def windows_platform?
12251200
win_platform_regex = /mingw|mswin/
12261201
RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex
12271202
end
1228-
12291203
end
12301204
end
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env ruby
2+
# encoding: utf-8
3+
4+
require File.dirname(__FILE__) + '/../test_helper'
5+
6+
# Test diff when the file path has to be quoted according to core.quotePath
7+
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
8+
#
9+
class TestDiffWithEscapedPath < Test::Unit::TestCase
10+
def test_diff_with_non_ascii_filename
11+
in_temp_dir do |path|
12+
create_file('my_other_file_☠', "First Line\n")
13+
`git init`
14+
`git add .`
15+
`git config --local core.safecrlf false` if Gem.win_platform?
16+
`git commit -m "First Commit"`
17+
update_file('my_other_file_☠', "Second Line\n")
18+
diff_paths = Git.open('.').diff.map(&:path)
19+
assert_equal(["my_other_file_☠"], diff_paths)
20+
end
21+
end
22+
end

tests/units/test_escaped_path.rb

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
require "#{File.dirname(__FILE__)}/../test_helper"
5+
6+
# Test diff when the file path has escapes according to core.quotePath
7+
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
8+
# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/
9+
# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby
10+
#
11+
class TestEscapedPath < Test::Unit::TestCase
12+
def test_simple_path
13+
path = 'my_other_file'
14+
expected_unescaped_path = 'my_other_file'
15+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
16+
end
17+
18+
def test_unicode_path
19+
path = 'my_other_file_\\342\\230\\240'
20+
expected_unescaped_path = 'my_other_file_☠'
21+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
22+
end
23+
24+
def test_single_char_escapes
25+
Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char|
26+
path = "\\#{escape_char}"
27+
assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape)
28+
end
29+
end
30+
31+
def test_compound_escape
32+
path = 'my_other_file_"\\342\\230\\240\\n"'
33+
expected_unescaped_path = "my_other_file_\"\n\""
34+
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
35+
end
36+
end

tests/units/test_logger.rb

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase
77
def setup
88
set_file_paths
99
end
10-
10+
11+
def missing_log_entry
12+
'Did not find expected log entry.'
13+
end
14+
15+
def unexpected_log_entry
16+
'Unexpected log entry found'
17+
end
18+
1119
def test_logger
1220
log = Tempfile.new('logfile')
1321
log.close
14-
22+
1523
logger = Logger.new(log.path)
1624
logger.level = Logger::DEBUG
17-
25+
1826
@git = Git.open(@wdir, :log => logger)
1927
@git.branches.size
20-
28+
2129
logc = File.read(log.path)
22-
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
23-
assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))
2430

31+
expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
32+
assert_match(expected_log_entry, logc, missing_log_entry)
33+
34+
expected_log_entry = /DEBUG -- : cherry/
35+
assert_match(expected_log_entry, logc, missing_log_entry)
36+
end
37+
38+
def test_logging_at_info_level_should_not_show_debug_messages
2539
log = Tempfile.new('logfile')
2640
log.close
2741
logger = Logger.new(log.path)
2842
logger.level = Logger::INFO
29-
43+
3044
@git = Git.open(@wdir, :log => logger)
3145
@git.branches.size
32-
46+
3347
logc = File.read(log.path)
34-
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
35-
assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))
48+
49+
expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
50+
assert_match(expected_log_entry, logc, missing_log_entry)
51+
52+
expected_log_entry = /DEBUG -- : cherry/
53+
assert_not_match(expected_log_entry, logc, unexpected_log_entry)
3654
end
37-
3855
end

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy