Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix diffs of files that have quoted paths #504

Merged
merged 1 commit into from Jan 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/git.rb
Expand Up @@ -9,6 +9,8 @@
require 'git/branches'
require 'git/config'
require 'git/diff'
require 'git/encoding_utils'
require 'git/escaped_path'
require 'git/index'
require 'git/lib'
require 'git/log'
Expand Down
2 changes: 1 addition & 1 deletion lib/git/base.rb
Expand Up @@ -36,7 +36,7 @@ def self.init(directory = '.', options = {})

init_options = {
:bare => options[:bare],
:initial_branch => options[:initial_branch],
:initial_branch => options[:initial_branch]
}

directory = options[:bare] ? options[:repository] : options[:working_directory]
Expand Down
4 changes: 2 additions & 2 deletions lib/git/diff.rb
Expand Up @@ -129,8 +129,8 @@ def process_full_diff
final = {}
current_file = nil
@full_diff.split("\n").each do |line|
if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line)
current_file = m[1]
if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line)
current_file = Git::EscapedPath.new(m[2]).unescape
final[current_file] = defaults.merge({:patch => line, :path => current_file})
else
if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line)
Expand Down
33 changes: 33 additions & 0 deletions lib/git/encoding_utils.rb
@@ -0,0 +1,33 @@
# frozen_string_literal: true

require 'rchardet'

module Git
# Method that can be used to detect and normalize string encoding
module EncodingUtils
def self.default_encoding
__ENCODING__.name
end

def self.best_guess_encoding
# Encoding::ASCII_8BIT.name
Encoding::UTF_8.name
end

def self.detected_encoding(str)
CharDet.detect(str)['encoding'] || best_guess_encoding
end

def self.encoding_options
{ invalid: :replace, undef: :replace }
end

def self.normalize_encoding(str)
return str if str.valid_encoding? && str.encoding.name == default_encoding

return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?

str.encode(default_encoding, detected_encoding(str), **encoding_options)
end
end
end
77 changes: 77 additions & 0 deletions lib/git/escaped_path.rb
@@ -0,0 +1,77 @@
# frozen_string_literal: true

module Git
# Represents an escaped Git path string
#
# Git commands that output paths (e.g. ls-files, diff), will escape usual
# characters in the path with backslashes in the same way C escapes control
# characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values
# larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8).
#
# @example
# Git::GitPath.new('\302\265').unescape # => "µ"
#
class EscapedPath
UNESCAPES = {
'a' => 0x07,
'b' => 0x08,
't' => 0x09,
'n' => 0x0a,
'v' => 0x0b,
'f' => 0x0c,
'r' => 0x0d,
'e' => 0x1b,
'\\' => 0x5c,
'"' => 0x22,
"'" => 0x27
}.freeze

attr_reader :path

def initialize(path)
@path = path
end

# Convert an escaped path to an unescaped path
def unescape
bytes = escaped_path_to_bytes(path)
str = bytes.pack('C*')
str.force_encoding(Encoding::UTF_8)
end

private

def extract_octal(path, index)
[path[index + 1..index + 4].to_i(8), 4]
end

def extract_escape(path, index)
[UNESCAPES[path[index + 1]], 2]
end

def extract_single_char(path, index)
[path[index].ord, 1]
end

def next_byte(path, index)
if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7'
extract_octal(path, index)
elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1])
extract_escape(path, index)
else
extract_single_char(path, index)
end
end

def escaped_path_to_bytes(path)
index = 0
[].tap do |bytes|
while index < path.length
byte, chars_used = next_byte(path, index)
bytes << byte
index += chars_used
end
end
end
end
end
32 changes: 3 additions & 29 deletions lib/git/lib.rb
@@ -1,4 +1,3 @@
require 'rchardet'
require 'tempfile'
require 'zlib'

Expand Down Expand Up @@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block)
global_opts = []
global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil?
global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil?
global_opts << ["-c", "color.ui=false"]
global_opts << %w[-c core.quotePath=true]
global_opts << %w[-c color.ui=false]

opts = [opts].flatten.map {|s| escape(s) }.join(' ')

Expand Down Expand Up @@ -1176,35 +1176,10 @@ def log_path_options(opts)
arr_opts
end

def default_encoding
__ENCODING__.name
end

def best_guess_encoding
# Encoding::ASCII_8BIT.name
Encoding::UTF_8.name
end

def detected_encoding(str)
CharDet.detect(str)['encoding'] || best_guess_encoding
end

def encoding_options
{ invalid: :replace, undef: :replace }
end

def normalize_encoding(str)
return str if str.valid_encoding? && str.encoding.name == default_encoding

return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?

str.encode(default_encoding, detected_encoding(str), **encoding_options)
end

def run_command(git_cmd, &block)
return IO.popen(git_cmd, &block) if block_given?

`#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join
`#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join
end

def escape(s)
Expand All @@ -1225,6 +1200,5 @@ def windows_platform?
win_platform_regex = /mingw|mswin/
RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex
end

end
end
2 changes: 1 addition & 1 deletion tests/units/test_archive.rb
Expand Up @@ -45,7 +45,7 @@ def test_archive

f = @git.object('v2.6').archive(tempfile, :format => 'tar', :prefix => 'test/', :path => 'ex_dir/')
assert(File.exist?(f))

lines = Minitar::Input.open(f).each.to_a.map(&:full_name)
assert_match(%r{test/}, lines[1])
assert_match(%r{test/ex_dir/ex\.txt}, lines[3])
Expand Down
22 changes: 22 additions & 0 deletions tests/units/test_diff_with_escaped_path.rb
@@ -0,0 +1,22 @@
#!/usr/bin/env ruby
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'

# Test diff when the file path has to be quoted according to core.quotePath
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
#
class TestDiffWithEscapedPath < Test::Unit::TestCase
def test_diff_with_non_ascii_filename
in_temp_dir do |path|
create_file('my_other_file_☠', "First Line\n")
`git init`
`git add .`
`git config --local core.safecrlf false` if Gem.win_platform?
`git commit -m "First Commit"`
update_file('my_other_file_☠', "Second Line\n")
diff_paths = Git.open('.').diff.map(&:path)
assert_equal(["my_other_file_☠"], diff_paths)
end
end
end
36 changes: 36 additions & 0 deletions tests/units/test_escaped_path.rb
@@ -0,0 +1,36 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require "#{File.dirname(__FILE__)}/../test_helper"

# Test diff when the file path has escapes according to core.quotePath
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/
# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby
#
class TestEscapedPath < Test::Unit::TestCase
def test_simple_path
path = 'my_other_file'
expected_unescaped_path = 'my_other_file'
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end

def test_unicode_path
path = 'my_other_file_\\342\\230\\240'
expected_unescaped_path = 'my_other_file_☠'
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end

def test_single_char_escapes
Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char|
path = "\\#{escape_char}"
assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape)
end
end

def test_compound_escape
path = 'my_other_file_"\\342\\230\\240\\n"'
expected_unescaped_path = "my_other_file_\"\n\""
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end
end
39 changes: 28 additions & 11 deletions tests/units/test_logger.rb
Expand Up @@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase
def setup
set_file_paths
end


def missing_log_entry
'Did not find expected log entry.'
end

def unexpected_log_entry
'Unexpected log entry found'
end

def test_logger
log = Tempfile.new('logfile')
log.close

logger = Logger.new(log.path)
logger.level = Logger::DEBUG

@git = Git.open(@wdir, :log => logger)
@git.branches.size

logc = File.read(log.path)
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))

expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
assert_match(expected_log_entry, logc, missing_log_entry)

expected_log_entry = /DEBUG -- : cherry/
assert_match(expected_log_entry, logc, missing_log_entry)
end

def test_logging_at_info_level_should_not_show_debug_messages
log = Tempfile.new('logfile')
log.close
logger = Logger.new(log.path)
logger.level = Logger::INFO

@git = Git.open(@wdir, :log => logger)
@git.branches.size

logc = File.read(log.path)
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))

expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
assert_match(expected_log_entry, logc, missing_log_entry)

expected_log_entry = /DEBUG -- : cherry/
assert_not_match(expected_log_entry, logc, unexpected_log_entry)
end

end