diff --git a/lib/git.rb b/lib/git.rb index 6e93957c..4ad1bd97 100644 --- a/lib/git.rb +++ b/lib/git.rb @@ -9,6 +9,8 @@ require 'git/branches' require 'git/config' require 'git/diff' +require 'git/encoding_utils' +require 'git/escaped_path' require 'git/index' require 'git/lib' require 'git/log' diff --git a/lib/git/base.rb b/lib/git/base.rb index 13edf848..815fc36a 100644 --- a/lib/git/base.rb +++ b/lib/git/base.rb @@ -36,7 +36,7 @@ def self.init(directory = '.', options = {}) init_options = { :bare => options[:bare], - :initial_branch => options[:initial_branch], + :initial_branch => options[:initial_branch] } directory = options[:bare] ? options[:repository] : options[:working_directory] diff --git a/lib/git/diff.rb b/lib/git/diff.rb index 06bd3941..d40ddce4 100644 --- a/lib/git/diff.rb +++ b/lib/git/diff.rb @@ -129,8 +129,8 @@ def process_full_diff final = {} current_file = nil @full_diff.split("\n").each do |line| - if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line) - current_file = m[1] + if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line) + current_file = Git::EscapedPath.new(m[2]).unescape final[current_file] = defaults.merge({:patch => line, :path => current_file}) else if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line) diff --git a/lib/git/encoding_utils.rb b/lib/git/encoding_utils.rb new file mode 100644 index 00000000..332b5461 --- /dev/null +++ b/lib/git/encoding_utils.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'rchardet' + +module Git + # Method that can be used to detect and normalize string encoding + module EncodingUtils + def self.default_encoding + __ENCODING__.name + end + + def self.best_guess_encoding + # Encoding::ASCII_8BIT.name + Encoding::UTF_8.name + end + + def self.detected_encoding(str) + CharDet.detect(str)['encoding'] || best_guess_encoding + end + + def self.encoding_options + { invalid: :replace, undef: :replace } + end + + def self.normalize_encoding(str) + return str if str.valid_encoding? && str.encoding.name == default_encoding + + return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding? + + str.encode(default_encoding, detected_encoding(str), **encoding_options) + end + end +end diff --git a/lib/git/escaped_path.rb b/lib/git/escaped_path.rb new file mode 100644 index 00000000..7519a3ac --- /dev/null +++ b/lib/git/escaped_path.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module Git + # Represents an escaped Git path string + # + # Git commands that output paths (e.g. ls-files, diff), will escape usual + # characters in the path with backslashes in the same way C escapes control + # characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values + # larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8). + # + # @example + # Git::GitPath.new('\302\265').unescape # => "µ" + # + class EscapedPath + UNESCAPES = { + 'a' => 0x07, + 'b' => 0x08, + 't' => 0x09, + 'n' => 0x0a, + 'v' => 0x0b, + 'f' => 0x0c, + 'r' => 0x0d, + 'e' => 0x1b, + '\\' => 0x5c, + '"' => 0x22, + "'" => 0x27 + }.freeze + + attr_reader :path + + def initialize(path) + @path = path + end + + # Convert an escaped path to an unescaped path + def unescape + bytes = escaped_path_to_bytes(path) + str = bytes.pack('C*') + str.force_encoding(Encoding::UTF_8) + end + + private + + def extract_octal(path, index) + [path[index + 1..index + 4].to_i(8), 4] + end + + def extract_escape(path, index) + [UNESCAPES[path[index + 1]], 2] + end + + def extract_single_char(path, index) + [path[index].ord, 1] + end + + def next_byte(path, index) + if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7' + extract_octal(path, index) + elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1]) + extract_escape(path, index) + else + extract_single_char(path, index) + end + end + + def escaped_path_to_bytes(path) + index = 0 + [].tap do |bytes| + while index < path.length + byte, chars_used = next_byte(path, index) + bytes << byte + index += chars_used + end + end + end + end +end diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 5641e4eb..d892462a 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -1,4 +1,3 @@ -require 'rchardet' require 'tempfile' require 'zlib' @@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block) global_opts = [] global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil? global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil? - global_opts << ["-c", "color.ui=false"] + global_opts << %w[-c core.quotePath=true] + global_opts << %w[-c color.ui=false] opts = [opts].flatten.map {|s| escape(s) }.join(' ') @@ -1176,35 +1176,10 @@ def log_path_options(opts) arr_opts end - def default_encoding - __ENCODING__.name - end - - def best_guess_encoding - # Encoding::ASCII_8BIT.name - Encoding::UTF_8.name - end - - def detected_encoding(str) - CharDet.detect(str)['encoding'] || best_guess_encoding - end - - def encoding_options - { invalid: :replace, undef: :replace } - end - - def normalize_encoding(str) - return str if str.valid_encoding? && str.encoding.name == default_encoding - - return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding? - - str.encode(default_encoding, detected_encoding(str), **encoding_options) - end - def run_command(git_cmd, &block) return IO.popen(git_cmd, &block) if block_given? - `#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join + `#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join end def escape(s) @@ -1225,6 +1200,5 @@ def windows_platform? win_platform_regex = /mingw|mswin/ RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex end - end end diff --git a/tests/units/test_diff_with_escaped_path.rb b/tests/units/test_diff_with_escaped_path.rb new file mode 100644 index 00000000..6387af77 --- /dev/null +++ b/tests/units/test_diff_with_escaped_path.rb @@ -0,0 +1,22 @@ +#!/usr/bin/env ruby +# encoding: utf-8 + +require File.dirname(__FILE__) + '/../test_helper' + +# Test diff when the file path has to be quoted according to core.quotePath +# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath +# +class TestDiffWithEscapedPath < Test::Unit::TestCase + def test_diff_with_non_ascii_filename + in_temp_dir do |path| + create_file('my_other_file_☠', "First Line\n") + `git init` + `git add .` + `git config --local core.safecrlf false` if Gem.win_platform? + `git commit -m "First Commit"` + update_file('my_other_file_☠', "Second Line\n") + diff_paths = Git.open('.').diff.map(&:path) + assert_equal(["my_other_file_☠"], diff_paths) + end + end +end diff --git a/tests/units/test_escaped_path.rb b/tests/units/test_escaped_path.rb new file mode 100755 index 00000000..38230e4f --- /dev/null +++ b/tests/units/test_escaped_path.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "#{File.dirname(__FILE__)}/../test_helper" + +# Test diff when the file path has escapes according to core.quotePath +# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath +# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/ +# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby +# +class TestEscapedPath < Test::Unit::TestCase + def test_simple_path + path = 'my_other_file' + expected_unescaped_path = 'my_other_file' + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end + + def test_unicode_path + path = 'my_other_file_\\342\\230\\240' + expected_unescaped_path = 'my_other_file_☠' + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end + + def test_single_char_escapes + Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char| + path = "\\#{escape_char}" + assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape) + end + end + + def test_compound_escape + path = 'my_other_file_"\\342\\230\\240\\n"' + expected_unescaped_path = "my_other_file_\"☠\n\"" + assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) + end +end diff --git a/tests/units/test_logger.rb b/tests/units/test_logger.rb index 0f72cc95..954c5e0c 100644 --- a/tests/units/test_logger.rb +++ b/tests/units/test_logger.rb @@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase def setup set_file_paths end - + + def missing_log_entry + 'Did not find expected log entry.' + end + + def unexpected_log_entry + 'Unexpected log entry found' + end + def test_logger log = Tempfile.new('logfile') log.close - + logger = Logger.new(log.path) logger.level = Logger::DEBUG - + @git = Git.open(@wdir, :log => logger) @git.branches.size - + logc = File.read(log.path) - assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc)) - assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc)) + expected_log_entry = /INFO -- : git (?.*?) branch ['"]-a['"]/ + assert_match(expected_log_entry, logc, missing_log_entry) + + expected_log_entry = /DEBUG -- : cherry/ + assert_match(expected_log_entry, logc, missing_log_entry) + end + + def test_logging_at_info_level_should_not_show_debug_messages log = Tempfile.new('logfile') log.close logger = Logger.new(log.path) logger.level = Logger::INFO - + @git = Git.open(@wdir, :log => logger) @git.branches.size - + logc = File.read(log.path) - assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc)) - assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc)) + + expected_log_entry = /INFO -- : git (?.*?) branch ['"]-a['"]/ + assert_match(expected_log_entry, logc, missing_log_entry) + + expected_log_entry = /DEBUG -- : cherry/ + assert_not_match(expected_log_entry, logc, unexpected_log_entry) end - end