From 89c007d877de1edd3601c24fc0335f5869916b3d Mon Sep 17 00:00:00 2001 From: James Couball Date: Fri, 31 Dec 2021 15:35:43 -0800 Subject: [PATCH] Properly unescape diff paths Signed-off-by: James Couball --- lib/git.rb | 2 + lib/git/base.rb | 2 +- lib/git/diff.rb | 4 +- lib/git/encoding_utils.rb | 33 ++++++++++ lib/git/escaped_path.rb | 77 ++++++++++++++++++++++ lib/git/lib.rb | 32 +-------- tests/units/test_archive.rb | 2 +- tests/units/test_diff_with_escaped_path.rb | 22 +++++++ tests/units/test_escaped_path.rb | 36 ++++++++++ tests/units/test_logger.rb | 39 +++++++---- 10 files changed, 205 insertions(+), 44 deletions(-) create mode 100644 lib/git/encoding_utils.rb create mode 100644 lib/git/escaped_path.rb create mode 100644 tests/units/test_diff_with_escaped_path.rb create mode 100755 tests/units/test_escaped_path.rb diff --git a/lib/git.rb b/lib/git.rb index 6e93957c..4ad1bd97 100644 --- a/lib/git.rb +++ b/lib/git.rb @@ -9,6 +9,8 @@ require 'git/branches' require 'git/config' require 'git/diff' +require 'git/encoding_utils' +require 'git/escaped_path' require 'git/index' require 'git/lib' require 'git/log' diff --git a/lib/git/base.rb b/lib/git/base.rb index 13edf848..815fc36a 100644 --- a/lib/git/base.rb +++ b/lib/git/base.rb @@ -36,7 +36,7 @@ def self.init(directory = '.', options = {}) init_options = { :bare => options[:bare], - :initial_branch => options[:initial_branch], + :initial_branch => options[:initial_branch] } directory = options[:bare] ? options[:repository] : options[:working_directory] diff --git a/lib/git/diff.rb b/lib/git/diff.rb index 06bd3941..d40ddce4 100644 --- a/lib/git/diff.rb +++ b/lib/git/diff.rb @@ -129,8 +129,8 @@ def process_full_diff final = {} current_file = nil @full_diff.split("\n").each do |line| - if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line) - current_file = m[1] + if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line) + current_file = Git::EscapedPath.new(m[2]).unescape final[current_file] = defaults.merge({:patch => line, :path => current_file}) else if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line) diff --git a/lib/git/encoding_utils.rb b/lib/git/encoding_utils.rb new file mode 100644 index 00000000..332b5461 --- /dev/null +++ b/lib/git/encoding_utils.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'rchardet' + +module Git + # Method that can be used to detect and normalize string encoding + module EncodingUtils + def self.default_encoding + __ENCODING__.name + end + + def self.best_guess_encoding + # Encoding::ASCII_8BIT.name + Encoding::UTF_8.name + end + + def self.detected_encoding(str) + CharDet.detect(str)['encoding'] || best_guess_encoding + end + + def self.encoding_options + { invalid: :replace, undef: :replace } + end + + def self.normalize_encoding(str) + return str if str.valid_encoding? && str.encoding.name == default_encoding + + return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding? + + str.encode(default_encoding, detected_encoding(str), **encoding_options) + end + end +end diff --git a/lib/git/escaped_path.rb b/lib/git/escaped_path.rb new file mode 100644 index 00000000..7519a3ac --- /dev/null +++ b/lib/git/escaped_path.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module Git + # Represents an escaped Git path string + # + # Git commands that output paths (e.g. ls-files, diff), will escape usual + # characters in the path with backslashes in the same way C escapes control + # characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values + # larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8). + # + # @example + # Git::GitPath.new('\302\265').unescape # => "µ" + # + class EscapedPath + UNESCAPES = { + 'a' => 0x07, + 'b' => 0x08, + 't' => 0x09, + 'n' => 0x0a, + 'v' => 0x0b, + 'f' => 0x0c, + 'r' => 0x0d, + 'e' => 0x1b, + '\\' => 0x5c, + '"' => 0x22, + "'" => 0x27 + }.freeze + + attr_reader :path + + def initialize(path) + @path = path + end + + # Convert an escaped path to an unescaped path + def unescape + bytes = escaped_path_to_bytes(path) + str = bytes.pack('C*') + str.force_encoding(Encoding::UTF_8) + end + + private + + def extract_octal(path, index) + [path[index + 1..index + 4].to_i(8), 4] + end + + def extract_escape(path, index) + [UNESCAPES[path[index + 1]], 2] + end + + def extract_single_char(path, index) + [path[index].ord, 1] + end + + def next_byte(path, index) + if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7' + extract_octal(path, index) + elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1]) + extract_escape(path, index) + else + extract_single_char(path, index) + end + end + + def escaped_path_to_bytes(path) + index = 0 + [].tap do |bytes| + while index < path.length + byte, chars_used = next_byte(path, index) + bytes << byte + index += chars_used + end + end + end + end +end diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 5641e4eb..d892462a 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -1,4 +1,3 @@ -require 'rchardet' require 'tempfile' require 'zlib' @@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block) global_opts = [] global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil? global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil? - global_opts << ["-c", "color.ui=false"] + global_opts << %w[-c core.quotePath=true] + global_opts << %w[-c color.ui=false] opts = [opts].flatten.map {|s| escape(s) }.join(' ') @@ -1176,35 +1176,10 @@ def log_path_options(opts) arr_opts end - def default_encoding - __ENCODING__.name - end - - def best_guess_encoding - # Encoding::ASCII_8BIT.name - Encoding::UTF_8.name - end - - def detected_encoding(str) - CharDet.detect(str)['encoding'] || best_guess_encoding - end - - def encoding_options - { invalid: :replace, undef: :replace } - end - - def normalize_encoding(str) - return str if str.valid_encoding? && str.encoding.name == default_encoding - - return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding? - - str.encode(default_encoding, detected_encoding(str), **encoding_options) - end - def run_command(git_cmd, &block) return IO.popen(git_cmd, &block) if block_given? - `#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join + `#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join end def escape(s) @@ -1225,6 +1200,5 @@ def windows_platform? win_platform_regex = /mingw|mswin/ RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex end - end end diff --git a/tests/units/test_archive.rb b/tests/units/test_archive.rb index 0bd0fc1f..3386a27f 100644 --- a/tests/units/test_archive.rb +++ b/tests/units/test_archive.rb @@ -45,7 +45,7 @@ def test_archive f = @git.object('v2.6').archive(tempfile, :format => 'tar', :prefix => 'test/', :path => 'ex_dir/') assert(File.exist?(f)) - + lines = Minitar::Input.open(f).each.to_a.map(&:full_name) assert_match(%r{test/}, lines[1]) assert_match(%r{test/ex_dir/ex\.txt}, lines[3]) diff --git a/tests/units/test_diff_with_escaped_path.rb b/tests/units/test_diff_with_escaped_path.rb new file mode 100644 index 00000000..6387af77 --- /dev/null +++ b/tests/units/test_diff_with_escaped_path.rb @@ -0,0 +1,22 @@ +#!/usr/bin/env ruby +# encoding: utf-8 + +require File.dirname(__FILE__) + '/../test_helper' + +# Test diff when the file path has to be quoted according to core.quotePath +# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath +# +class TestDiffWithEscapedPath < Test::Unit::TestCase + def test_diff_with_non_ascii_filename + in_temp_dir do |path| + create_file('my_other_file_☠', "First Line\n") + `git init` + `git add .` + `git config --local core.safecrlf false` if Gem.win_platform? + `git commit -m "First Commit"` + update_file('my_other_file_☠', "Second Line\n") + diff_paths = Git.open('.').diff.map(&:path) + assert_equal(["my_other_file_☠"], diff_paths) + end + end +end diff --git a/tests/units/test_escaped_path.rb b/tests/units/test_escaped_path.rb new file mode 100755 index 00000000..38230e4f --- /dev/null +++ b/tests/units/test_escaped_path.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "#{File.dirname(__FILE__)}/../test_helper" + +# Test diff when the file path has escapes according to core.quotePath +# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath +# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/ +# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby +# +class TestEscapedPath < Test::Unit::TestCase + def test_simple_path + path = 'my_other_file' + expected_unescaped_path = 'my_other_file' + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end + + def test_unicode_path + path = 'my_other_file_\\342\\230\\240' + expected_unescaped_path = 'my_other_file_☠' + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end + + def test_single_char_escapes + Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char| + path = "\\#{escape_char}" + assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape) + end + end + + def test_compound_escape + path = 'my_other_file_"\\342\\230\\240\\n"' + expected_unescaped_path = "my_other_file_\"☠\n\"" + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end +end diff --git a/tests/units/test_logger.rb b/tests/units/test_logger.rb index 0f72cc95..954c5e0c 100644 --- a/tests/units/test_logger.rb +++ b/tests/units/test_logger.rb @@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase def setup set_file_paths end - + + def missing_log_entry + 'Did not find expected log entry.' + end + + def unexpected_log_entry + 'Unexpected log entry found' + end + def test_logger log = Tempfile.new('logfile') log.close - + logger = Logger.new(log.path) logger.level = Logger::DEBUG - + @git = Git.open(@wdir, :log => logger) @git.branches.size - + logc = File.read(log.path) - assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc)) - assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc)) + expected_log_entry = /INFO -- : git (?.*?) branch ['"]-a['"]/ + assert_match(expected_log_entry, logc, missing_log_entry) + + expected_log_entry = /DEBUG -- : cherry/ + assert_match(expected_log_entry, logc, missing_log_entry) + end + + def test_logging_at_info_level_should_not_show_debug_messages log = Tempfile.new('logfile') log.close logger = Logger.new(log.path) logger.level = Logger::INFO - + @git = Git.open(@wdir, :log => logger) @git.branches.size - + logc = File.read(log.path) - assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc)) - assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc)) + + expected_log_entry = /INFO -- : git (?.*?) branch ['"]-a['"]/ + assert_match(expected_log_entry, logc, missing_log_entry) + + expected_log_entry = /DEBUG -- : cherry/ + assert_not_match(expected_log_entry, logc, unexpected_log_entry) end - end pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy