Skip to content

Commit

Permalink
Properly unescape diff paths
Browse files Browse the repository at this point in the history
Signed-off-by: James Couball <jcouball@yahoo.com>
  • Loading branch information
jcouball committed Dec 31, 2021
1 parent ea47044 commit 89c007d
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 44 deletions.
2 changes: 2 additions & 0 deletions lib/git.rb
Expand Up @@ -9,6 +9,8 @@
require 'git/branches'
require 'git/config'
require 'git/diff'
require 'git/encoding_utils'
require 'git/escaped_path'
require 'git/index'
require 'git/lib'
require 'git/log'
Expand Down
2 changes: 1 addition & 1 deletion lib/git/base.rb
Expand Up @@ -36,7 +36,7 @@ def self.init(directory = '.', options = {})

init_options = {
:bare => options[:bare],
:initial_branch => options[:initial_branch],
:initial_branch => options[:initial_branch]
}

directory = options[:bare] ? options[:repository] : options[:working_directory]
Expand Down
4 changes: 2 additions & 2 deletions lib/git/diff.rb
Expand Up @@ -129,8 +129,8 @@ def process_full_diff
final = {}
current_file = nil
@full_diff.split("\n").each do |line|
if m = /^diff --git a\/(.*?) b\/(.*?)/.match(line)
current_file = m[1]
if m = %r{\Adiff --git ("?)a/(.+?)\1 ("?)b/(.+?)\3\z}.match(line)
current_file = Git::EscapedPath.new(m[2]).unescape
final[current_file] = defaults.merge({:patch => line, :path => current_file})
else
if m = /^index ([0-9a-f]{4,40})\.\.([0-9a-f]{4,40})( ......)*/.match(line)
Expand Down
33 changes: 33 additions & 0 deletions lib/git/encoding_utils.rb
@@ -0,0 +1,33 @@
# frozen_string_literal: true

require 'rchardet'

module Git
# Method that can be used to detect and normalize string encoding
module EncodingUtils
def self.default_encoding
__ENCODING__.name
end

def self.best_guess_encoding
# Encoding::ASCII_8BIT.name
Encoding::UTF_8.name
end

def self.detected_encoding(str)
CharDet.detect(str)['encoding'] || best_guess_encoding
end

def self.encoding_options
{ invalid: :replace, undef: :replace }
end

def self.normalize_encoding(str)
return str if str.valid_encoding? && str.encoding.name == default_encoding

return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?

str.encode(default_encoding, detected_encoding(str), **encoding_options)
end
end
end
77 changes: 77 additions & 0 deletions lib/git/escaped_path.rb
@@ -0,0 +1,77 @@
# frozen_string_literal: true

module Git
# Represents an escaped Git path string
#
# Git commands that output paths (e.g. ls-files, diff), will escape usual
# characters in the path with backslashes in the same way C escapes control
# characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values
# larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8).
#
# @example
# Git::GitPath.new('\302\265').unescape # => "µ"
#
class EscapedPath
UNESCAPES = {
'a' => 0x07,
'b' => 0x08,
't' => 0x09,
'n' => 0x0a,
'v' => 0x0b,
'f' => 0x0c,
'r' => 0x0d,
'e' => 0x1b,
'\\' => 0x5c,
'"' => 0x22,
"'" => 0x27
}.freeze

attr_reader :path

def initialize(path)
@path = path
end

# Convert an escaped path to an unescaped path
def unescape
bytes = escaped_path_to_bytes(path)
str = bytes.pack('C*')
str.force_encoding(Encoding::UTF_8)
end

private

def extract_octal(path, index)
[path[index + 1..index + 4].to_i(8), 4]
end

def extract_escape(path, index)
[UNESCAPES[path[index + 1]], 2]
end

def extract_single_char(path, index)
[path[index].ord, 1]
end

def next_byte(path, index)
if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7'
extract_octal(path, index)
elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1])
extract_escape(path, index)
else
extract_single_char(path, index)
end
end

def escaped_path_to_bytes(path)
index = 0
[].tap do |bytes|
while index < path.length
byte, chars_used = next_byte(path, index)
bytes << byte
index += chars_used
end
end
end
end
end
32 changes: 3 additions & 29 deletions lib/git/lib.rb
@@ -1,4 +1,3 @@
require 'rchardet'
require 'tempfile'
require 'zlib'

Expand Down Expand Up @@ -1085,7 +1084,8 @@ def command(cmd, *opts, &block)
global_opts = []
global_opts << "--git-dir=#{@git_dir}" if !@git_dir.nil?
global_opts << "--work-tree=#{@git_work_dir}" if !@git_work_dir.nil?
global_opts << ["-c", "color.ui=false"]
global_opts << %w[-c core.quotePath=true]
global_opts << %w[-c color.ui=false]

opts = [opts].flatten.map {|s| escape(s) }.join(' ')

Expand Down Expand Up @@ -1176,35 +1176,10 @@ def log_path_options(opts)
arr_opts
end

def default_encoding
__ENCODING__.name
end

def best_guess_encoding
# Encoding::ASCII_8BIT.name
Encoding::UTF_8.name
end

def detected_encoding(str)
CharDet.detect(str)['encoding'] || best_guess_encoding
end

def encoding_options
{ invalid: :replace, undef: :replace }
end

def normalize_encoding(str)
return str if str.valid_encoding? && str.encoding.name == default_encoding

return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding?

str.encode(default_encoding, detected_encoding(str), **encoding_options)
end

def run_command(git_cmd, &block)
return IO.popen(git_cmd, &block) if block_given?

`#{git_cmd}`.lines.map { |l| normalize_encoding(l) }.join
`#{git_cmd}`.lines.map { |l| Git::EncodingUtils.normalize_encoding(l) }.join
end

def escape(s)
Expand All @@ -1225,6 +1200,5 @@ def windows_platform?
win_platform_regex = /mingw|mswin/
RUBY_PLATFORM =~ win_platform_regex || RUBY_DESCRIPTION =~ win_platform_regex
end

end
end
2 changes: 1 addition & 1 deletion tests/units/test_archive.rb
Expand Up @@ -45,7 +45,7 @@ def test_archive

f = @git.object('v2.6').archive(tempfile, :format => 'tar', :prefix => 'test/', :path => 'ex_dir/')
assert(File.exist?(f))

lines = Minitar::Input.open(f).each.to_a.map(&:full_name)
assert_match(%r{test/}, lines[1])
assert_match(%r{test/ex_dir/ex\.txt}, lines[3])
Expand Down
22 changes: 22 additions & 0 deletions tests/units/test_diff_with_escaped_path.rb
@@ -0,0 +1,22 @@
#!/usr/bin/env ruby
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'

# Test diff when the file path has to be quoted according to core.quotePath
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
#
class TestDiffWithEscapedPath < Test::Unit::TestCase
def test_diff_with_non_ascii_filename
in_temp_dir do |path|
create_file('my_other_file_☠', "First Line\n")
`git init`
`git add .`
`git config --local core.safecrlf false` if Gem.win_platform?
`git commit -m "First Commit"`
update_file('my_other_file_☠', "Second Line\n")
diff_paths = Git.open('.').diff.map(&:path)
assert_equal(["my_other_file_☠"], diff_paths)
end
end
end
36 changes: 36 additions & 0 deletions tests/units/test_escaped_path.rb
@@ -0,0 +1,36 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require "#{File.dirname(__FILE__)}/../test_helper"

# Test diff when the file path has escapes according to core.quotePath
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/
# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby
#
class TestEscapedPath < Test::Unit::TestCase
def test_simple_path
path = 'my_other_file'
expected_unescaped_path = 'my_other_file'
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end

def test_unicode_path
path = 'my_other_file_\\342\\230\\240'
expected_unescaped_path = 'my_other_file_☠'
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end

def test_single_char_escapes
Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char|
path = "\\#{escape_char}"
assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape)
end
end

def test_compound_escape
path = 'my_other_file_"\\342\\230\\240\\n"'
expected_unescaped_path = "my_other_file_\"\n\""
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape)
end
end
39 changes: 28 additions & 11 deletions tests/units/test_logger.rb
Expand Up @@ -7,32 +7,49 @@ class TestLogger < Test::Unit::TestCase
def setup
set_file_paths
end


def missing_log_entry
'Did not find expected log entry.'
end

def unexpected_log_entry
'Unexpected log entry found'
end

def test_logger
log = Tempfile.new('logfile')
log.close

logger = Logger.new(log.path)
logger.level = Logger::DEBUG

@git = Git.open(@wdir, :log => logger)
@git.branches.size

logc = File.read(log.path)
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
assert(/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))

expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
assert_match(expected_log_entry, logc, missing_log_entry)

expected_log_entry = /DEBUG -- : cherry/
assert_match(expected_log_entry, logc, missing_log_entry)
end

def test_logging_at_info_level_should_not_show_debug_messages
log = Tempfile.new('logfile')
log.close
logger = Logger.new(log.path)
logger.level = Logger::INFO

@git = Git.open(@wdir, :log => logger)
@git.branches.size

logc = File.read(log.path)
assert(/INFO -- : git ['"]--git-dir=[^'"]+['"] ['"]--work-tree=[^'"]+['"] ['"]-c['"] ['"]color.ui=false['"] branch ['"]-a['"]/.match(logc))
assert(!/DEBUG -- : cherry\n diff_over_patches\n\* git_grep/m.match(logc))

expected_log_entry = /INFO -- : git (?<global_options>.*?) branch ['"]-a['"]/
assert_match(expected_log_entry, logc, missing_log_entry)

expected_log_entry = /DEBUG -- : cherry/
assert_not_match(expected_log_entry, logc, unexpected_log_entry)
end

end

0 comments on commit 89c007d

Please sign in to comment.