Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: James Couball <jcouball@yahoo.com>
- Loading branch information
Showing
10 changed files
with
205 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'rchardet' | ||
|
||
module Git | ||
# Method that can be used to detect and normalize string encoding | ||
module EncodingUtils | ||
def self.default_encoding | ||
__ENCODING__.name | ||
end | ||
|
||
def self.best_guess_encoding | ||
# Encoding::ASCII_8BIT.name | ||
Encoding::UTF_8.name | ||
end | ||
|
||
def self.detected_encoding(str) | ||
CharDet.detect(str)['encoding'] || best_guess_encoding | ||
end | ||
|
||
def self.encoding_options | ||
{ invalid: :replace, undef: :replace } | ||
end | ||
|
||
def self.normalize_encoding(str) | ||
return str if str.valid_encoding? && str.encoding.name == default_encoding | ||
|
||
return str.encode(default_encoding, str.encoding, **encoding_options) if str.valid_encoding? | ||
|
||
str.encode(default_encoding, detected_encoding(str), **encoding_options) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# frozen_string_literal: true | ||
|
||
module Git | ||
# Represents an escaped Git path string | ||
# | ||
# Git commands that output paths (e.g. ls-files, diff), will escape usual | ||
# characters in the path with backslashes in the same way C escapes control | ||
# characters (e.g. \t for TAB, \n for LF, \\ for backslash) or bytes with values | ||
# larger than 0x80 (e.g. octal \302\265 for "micro" in UTF-8). | ||
# | ||
# @example | ||
# Git::GitPath.new('\302\265').unescape # => "µ" | ||
# | ||
class EscapedPath | ||
UNESCAPES = { | ||
'a' => 0x07, | ||
'b' => 0x08, | ||
't' => 0x09, | ||
'n' => 0x0a, | ||
'v' => 0x0b, | ||
'f' => 0x0c, | ||
'r' => 0x0d, | ||
'e' => 0x1b, | ||
'\\' => 0x5c, | ||
'"' => 0x22, | ||
"'" => 0x27 | ||
}.freeze | ||
|
||
attr_reader :path | ||
|
||
def initialize(path) | ||
@path = path | ||
end | ||
|
||
# Convert an escaped path to an unescaped path | ||
def unescape | ||
bytes = escaped_path_to_bytes(path) | ||
str = bytes.pack('C*') | ||
str.force_encoding(Encoding::UTF_8) | ||
end | ||
|
||
private | ||
|
||
def extract_octal(path, index) | ||
[path[index + 1..index + 4].to_i(8), 4] | ||
end | ||
|
||
def extract_escape(path, index) | ||
[UNESCAPES[path[index + 1]], 2] | ||
end | ||
|
||
def extract_single_char(path, index) | ||
[path[index].ord, 1] | ||
end | ||
|
||
def next_byte(path, index) | ||
if path[index] == '\\' && path[index + 1] >= '0' && path[index + 1] <= '7' | ||
extract_octal(path, index) | ||
elsif path[index] == '\\' && UNESCAPES.include?(path[index + 1]) | ||
extract_escape(path, index) | ||
else | ||
extract_single_char(path, index) | ||
end | ||
end | ||
|
||
def escaped_path_to_bytes(path) | ||
index = 0 | ||
[].tap do |bytes| | ||
while index < path.length | ||
byte, chars_used = next_byte(path, index) | ||
bytes << byte | ||
index += chars_used | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/usr/bin/env ruby | ||
# encoding: utf-8 | ||
|
||
require File.dirname(__FILE__) + '/../test_helper' | ||
|
||
# Test diff when the file path has to be quoted according to core.quotePath | ||
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath | ||
# | ||
class TestDiffWithEscapedPath < Test::Unit::TestCase | ||
def test_diff_with_non_ascii_filename | ||
in_temp_dir do |path| | ||
create_file('my_other_file_☠', "First Line\n") | ||
`git init` | ||
`git add .` | ||
`git config --local core.safecrlf false` if Gem.win_platform? | ||
`git commit -m "First Commit"` | ||
update_file('my_other_file_☠', "Second Line\n") | ||
diff_paths = Git.open('.').diff.map(&:path) | ||
assert_equal(["my_other_file_☠"], diff_paths) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
|
||
require "#{File.dirname(__FILE__)}/../test_helper" | ||
|
||
# Test diff when the file path has escapes according to core.quotePath | ||
# See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath | ||
# See https://www.jvt.me/posts/2020/06/23/byte-array-to-string-ruby/ | ||
# See https://stackoverflow.com/questions/54788845/how-can-i-convert-a-guid-into-a-byte-array-in-ruby | ||
# | ||
class TestEscapedPath < Test::Unit::TestCase | ||
def test_simple_path | ||
path = 'my_other_file' | ||
expected_unescaped_path = 'my_other_file' | ||
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) | ||
end | ||
|
||
def test_unicode_path | ||
path = 'my_other_file_\\342\\230\\240' | ||
expected_unescaped_path = 'my_other_file_☠' | ||
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) | ||
end | ||
|
||
def test_single_char_escapes | ||
Git::EscapedPath::UNESCAPES.each_pair do |escape_char, expected_char| | ||
path = "\\#{escape_char}" | ||
assert_equal(expected_char.chr, Git::EscapedPath.new(path).unescape) | ||
end | ||
end | ||
|
||
def test_compound_escape | ||
path = 'my_other_file_"\\342\\230\\240\\n"' | ||
expected_unescaped_path = "my_other_file_\"☠\n\"" | ||
assert_equal(expected_unescaped_path, Git::EscapedPath.new(path).unescape) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters