From c96ebede87d37544ffde2dd2fc35eb5f44690989 Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 15:29:45 +0000 Subject: [PATCH 1/8] Introduce example repo with a signed commit Git::Lib#commit_data currently produces a malformed data hash for a commit containing a gitsig header field, with the majority of the PGP signature being considered part of the message. I'd like to fix this, so this introduces a new example repo with a single signed commit in it. Signed-off-by: Simon Coffey --- tests/files/signed_commits/dot_git/HEAD | 1 + tests/files/signed_commits/dot_git/config | 7 +++++++ tests/files/signed_commits/dot_git/index | Bin 0 -> 137 bytes tests/files/signed_commits/dot_git/logs/HEAD | 1 + .../signed_commits/dot_git/logs/refs/heads/main | 1 + .../4a/7f2717e7c6b029dcadf62aac0e5e811332f39d | Bin 0 -> 71 bytes .../92/fd5b7c1aeb6a4e2602799f01608b3deebbad2d | Bin 0 -> 54 bytes .../a0/43c677c93d9f2b285771a29742cc73715e41ea | Bin 0 -> 408 bytes .../files/signed_commits/dot_git/refs/heads/main | 1 + tests/files/signed_commits/notes.txt | 1 + 10 files changed, 12 insertions(+) create mode 100644 tests/files/signed_commits/dot_git/HEAD create mode 100644 tests/files/signed_commits/dot_git/config create mode 100644 tests/files/signed_commits/dot_git/index create mode 100644 tests/files/signed_commits/dot_git/logs/HEAD create mode 100644 tests/files/signed_commits/dot_git/logs/refs/heads/main create mode 100644 tests/files/signed_commits/dot_git/objects/4a/7f2717e7c6b029dcadf62aac0e5e811332f39d create mode 100644 tests/files/signed_commits/dot_git/objects/92/fd5b7c1aeb6a4e2602799f01608b3deebbad2d create mode 100644 tests/files/signed_commits/dot_git/objects/a0/43c677c93d9f2b285771a29742cc73715e41ea create mode 100644 tests/files/signed_commits/dot_git/refs/heads/main create mode 100644 tests/files/signed_commits/notes.txt diff --git a/tests/files/signed_commits/dot_git/HEAD b/tests/files/signed_commits/dot_git/HEAD new file mode 100644 index 00000000..b870d826 --- /dev/null +++ b/tests/files/signed_commits/dot_git/HEAD @@ -0,0 +1 @@ +ref: refs/heads/main diff --git a/tests/files/signed_commits/dot_git/config b/tests/files/signed_commits/dot_git/config new file mode 100644 index 00000000..6c9406b7 --- /dev/null +++ b/tests/files/signed_commits/dot_git/config @@ -0,0 +1,7 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + ignorecase = true + precomposeunicode = true diff --git a/tests/files/signed_commits/dot_git/index b/tests/files/signed_commits/dot_git/index new file mode 100644 index 0000000000000000000000000000000000000000..8ad0865841685ba837df95fe264c30863b3f3e58 GIT binary patch literal 137 zcmZ?q402{*U|<4b#^j?WPuOK_)nGIu1A`#b^gUA<7#f!VrN05yhybyTSG~IU^J5z{ z@2vf%wT3UQQP}A7Tn5g({F2mSy^@L&hL9jvSD-pc215k{u1SBRYouOh`Kd8g&Sy;M gwtcsIt!_=PWcB-B_io){y 1673868871 +0000 commit (initial): Signed commit diff --git a/tests/files/signed_commits/dot_git/logs/refs/heads/main b/tests/files/signed_commits/dot_git/logs/refs/heads/main new file mode 100644 index 00000000..130aef50 --- /dev/null +++ b/tests/files/signed_commits/dot_git/logs/refs/heads/main @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 a043c677c93d9f2b285771a29742cc73715e41ea Simon Coffey 1673868871 +0000 commit (initial): Signed commit diff --git a/tests/files/signed_commits/dot_git/objects/4a/7f2717e7c6b029dcadf62aac0e5e811332f39d b/tests/files/signed_commits/dot_git/objects/4a/7f2717e7c6b029dcadf62aac0e5e811332f39d new file mode 100644 index 0000000000000000000000000000000000000000..574632315854b3639844161447bf3b65c4b2ad28 GIT binary patch literal 71 zcmV-N0J#5n0ZYosPf{>5V8|>{$ShVUOD(EY$jmLsFDgmQD^V!PNGwrE&PdElPc2p` d$p`X*YSJ=uQWX-5QbCF{(=t<2xBxVj7#Q|k8&v=R literal 0 HcmV?d00001 diff --git a/tests/files/signed_commits/dot_git/objects/92/fd5b7c1aeb6a4e2602799f01608b3deebbad2d b/tests/files/signed_commits/dot_git/objects/92/fd5b7c1aeb6a4e2602799f01608b3deebbad2d new file mode 100644 index 0000000000000000000000000000000000000000..dedfeed897b03c35b2d2c3a49d2ec1487358f774 GIT binary patch literal 54 zcmV-60LlM&0V^p=O;s>9XD~D{Ff%bx$jdKDE!HckC}HrbR~LVNY=h>VwcoVX@WnL> M8-1P&08nQTO0hI{s}m|G91Y(5|)QnzZ7h5$sQZc=FQu>77C`G^FeitfoBzTN@lr<8FBE0S&m`NJg+cJ$8d>F86yN! ziz3{svh5%hb=yGL76lvNv%GcV{N8;ndewKVVz|3W^%pRmne95RJ2PQq=t5f6!xj5a z48L$GIDEQQ5XH+4Y~wAY%Xs7Ovt4x85g-tYEb@cnlOGt*%VbxJNKwxW&Lp_x{9p~? zYIJB;uMZ;0!lRp6IrpE!UHGnR0yz!-cH+{QwsZcxwt~~a4x5@0dN@U=eVC}ZmBiUV zR&rRCWqytPD|z+(SlP!#ks|2(^=9Fng_r)Iqla)YTDyrwx8JApNo8~Xh~Uxo_YHd6 zCfa=a*Dfd(ykQjH?uXF66cVnYssLA2dxe^TYC$onep`TIg@OH49nO`|ffYJBeX~Ch zIY!HmHPrYl8V;&t8X_g;`t2(vD$(4T`k#T+v1$ue@1DQNLZVB@B Date: Mon, 16 Jan 2023 15:35:23 +0000 Subject: [PATCH 2/8] Fix parsing of multiline gpgsig commit header field Git::Lib#commit_data currently misparses commits containing a multiline gpgsig header, extracting only the first line as the gpgsig entry, and considering the rest of the key part of the commit message. Per the git docs[1] the gpgsig header is used to give the GPG signature of a signed commit, with continuation lines denoted by a leading space: tree eebfed94e75e7760540d1485c740902590a00332 parent 04b871796dc0420f8e7561a895b52484b701d51a author A U Thor 1465981137 +0000 committer C O Mitter 1465981137 +0000 gpgsig -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 $ iQEcBAABAgAGBQJXYRjRAAoJEGEJLoW3InGJ3IwIAIY4SA6GxY3BjL60YyvsJPh/ HRCJwH+w7wt3Yc/9/bW2F+gF72kdHOOs2jfv+OZhq0q4OAN6fvVSczISY/82LpS7 DVdMQj2/YcHDT4xrDNBnXnviDO9G7am/9OE77kEbXrp7QPxvhjkicHNwy2rEflAA zn075rtEERDHr8nRYiDh8eVrefSO7D+bdQ7gv+7GsYMsd2auJWi1dHOSfTr9HIF4 HJhWXT9d2f8W+diRYXGh4X0wYiGg6na/soXc+vdtDYBzIxanRqjg8jCAeo1eOTk1 EdTwhcTZlI0x5pvJ3H0+4hA2jtldVtmPM4OTB0cTrEWBad7XV6YgiyuII73Ve3I= =jKHM -----END PGP SIGNATURE----- signed commit signed commit message body This commit adds a test and a special parsing case for the gpgsig header to accommodate this. [1] https://git-scm.com/docs/gitformat-signature#_commit_signatures Signed-off-by: Simon Coffey --- lib/git/lib.rb | 6 ++++ tests/units/test_signed_commits.rb | 58 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 tests/units/test_signed_commits.rb diff --git a/lib/git/lib.rb b/lib/git/lib.rb index b4d16152..7b2fe85e 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -238,6 +238,12 @@ def process_commit_data(data, sha = nil, indent = 4) if key == 'parent' hsh['parent'] << value.join(' ') + elsif key == 'gpgsig' + hsh['gpgsig'] = value.join(' ').lstrip + + while data.first.start_with?(' ') + hsh['gpgsig'] += "\n" + data.shift.lstrip + end else hsh[key] = value.join(' ') end diff --git a/tests/units/test_signed_commits.rb b/tests/units/test_signed_commits.rb new file mode 100644 index 00000000..578b082c --- /dev/null +++ b/tests/units/test_signed_commits.rb @@ -0,0 +1,58 @@ +#!/usr/bin/env ruby + +require File.dirname(__FILE__) + '/../test_helper' +require "fileutils" + +class TestSignedCommits < Test::Unit::TestCase + def git_working_dir + cwd = FileUtils.pwd + if File.directory?(File.join(cwd, 'files')) + test_dir = File.join(cwd, 'files') + elsif File.directory?(File.join(cwd, '..', 'files')) + test_dir = File.join(cwd, '..', 'files') + elsif File.directory?(File.join(cwd, 'tests', 'files')) + test_dir = File.join(cwd, 'tests', 'files') + end + + create_temp_repo(File.expand_path(File.join(test_dir, 'signed_commits'))) + end + + def create_temp_repo(clone_path) + filename = 'git_test' + Time.now.to_i.to_s + rand(300).to_s.rjust(3, '0') + @tmp_path = File.join("/tmp/", filename) + FileUtils.mkdir_p(@tmp_path) + FileUtils.cp_r(clone_path, @tmp_path) + tmp_path = File.join(@tmp_path, File.basename(clone_path)) + Dir.chdir(tmp_path) do + FileUtils.mv('dot_git', '.git') + end + tmp_path + end + + def setup + @lib = Git.open(git_working_dir).lib + end + + def test_commit_data + data = @lib.commit_data('a043c677c93d9f2b') + + assert_equal('Simon Coffey 1673868871 +0000', data['author']) + assert_equal('92fd5b7c1aeb6a4e2602799f01608b3deebbad2d', data['tree']) + assert_equal(<<~EOS.chomp, data['gpgsig']) + -----BEGIN PGP SIGNATURE----- + + iHUEABYKAB0WIQRmiEtd91BkbBpcgV2yCJ+VnJz/iQUCY8U2cgAKCRCyCJ+VnJz/ + ibjyAP48dGdoFgWL2BjV3CnmebdVjEjTCQtF2QGUybJsyJhhcwEAwbzAAGt3YHfS + uuLNH9ki9Sqd+/CH+L8Q2dPM5F4l3gg= + =3ATn + -----END PGP SIGNATURE----- + EOS + assert_equal(<<~EOS, data['message']) + Signed commit + + This will allow me to test commit data extraction for signed commits. + I'm making the message multiline to make sure that message extraction is + preserved. + EOS + end +end From 2723cd33505893dcb1c9ed8fcb2d7a4251b6e070 Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 17:15:28 +0000 Subject: [PATCH 3/8] Extract commit header parsing In the previous commit I introduced a new case for git commit header parsing to cover the gpgsig header, which supports multi-line values. I think the #process_commit_data method is getting a bit unwieldy, so this commit extracts the generic header parsing, separating it from the special-case handling of parent (which is not multi-line, but can have multiple entries and thus multiple values). By switching to a regex key/value extraction approach we're also able to avoid splitting the entire string before re-joining the value. Signed-off-by: Simon Coffey --- lib/git/lib.rb | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 7b2fe85e..a68d16dc 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -231,21 +231,11 @@ def process_commit_data(data, sha = nil, indent = 4) 'parent' => [] } - loop do - key, *value = data.shift.split - - break if key.nil? - + each_cat_file_header(data) do |key, value| if key == 'parent' - hsh['parent'] << value.join(' ') - elsif key == 'gpgsig' - hsh['gpgsig'] = value.join(' ').lstrip - - while data.first.start_with?(' ') - hsh['gpgsig'] += "\n" + data.shift.lstrip - end + hsh['parent'] << value else - hsh[key] = value.join(' ') + hsh[key] = value end end @@ -254,6 +244,21 @@ def process_commit_data(data, sha = nil, indent = 4) return hsh end + CAT_FILE_HEADER_LINE = /\A(?\w+) (?.*)\z/ + + def each_cat_file_header(data) + while (match = CAT_FILE_HEADER_LINE.match(data.shift)) + key = match[:key] + value_lines = [match[:value]] + + while data.first.start_with?(' ') + value_lines << data.shift.lstrip + end + + yield key, value_lines.join("\n") + end + end + def tag_data(name) sha = sha.to_s tdata = command_lines('cat-file', 'tag', name) From cc12203152993682fac72f29606aff90a86e5687 Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Tue, 17 Jan 2023 11:05:58 +0000 Subject: [PATCH 4/8] Use cat-file header parsing for Git::Lib#tag_data The general format of `git cat-file tag` output is identical to that of `git cat-file commit`, so we can use the generic parsing helper. Signed-off-by: Simon Coffey --- lib/git/lib.rb | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index a68d16dc..74b744a6 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -271,12 +271,8 @@ def process_tag_data(data, name, indent=4) 'message' => '' } - loop do - key, *value = data.shift.split - - break if key.nil? - - hsh[key] = value.join(' ') + each_cat_file_header(data) do |key, value| + hsh[key] = value end hsh['message'] = data.collect {|line| line[indent..-1]}.join("\n") + "\n" From 2952cedea8ed539f9d0cf35c460b9a2f4e4b9f26 Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 18:14:23 +0000 Subject: [PATCH 5/8] Remove unnecessary default tag/commit message values These methods always explicitly set the 'message' key of the output hash based on the `git cat-file` output, so we don't need a default in the starting hash. Signed-off-by: Simon Coffey --- lib/git/lib.rb | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 74b744a6..eb6e3764 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -226,9 +226,8 @@ def commit_data(sha) def process_commit_data(data, sha = nil, indent = 4) hsh = { - 'sha' => sha, - 'message' => '', - 'parent' => [] + 'sha' => sha, + 'parent' => [] } each_cat_file_header(data) do |key, value| @@ -266,10 +265,7 @@ def tag_data(name) end def process_tag_data(data, name, indent=4) - hsh = { - 'name' => name, - 'message' => '' - } + hsh = { 'name' => name } each_cat_file_header(data) do |key, value| hsh[key] = value From 59dd023f5c8d9c412eca72878657fdeb86cf15db Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 16:03:27 +0000 Subject: [PATCH 6/8] Remove #process_commmit_data indent parameter This was introduced in [1] to handle variance in indentation when parsing commit messages; at the time the #process_commit_data method had two callers, one which used it to process log lines, and the other which used it to process the output of `git cat-file commit `. The former sees a 4-space indent, the latter, zero. Since [2], however, the log processing has been handled by the #process_commit_log_data method, so #process_commit_data exclusively handles un-indented inputs, and we can remove the indent parameter. [1] https://github.com/ruby-git/ruby-git/commit/05117d4ebc1ba903083802f703739c55207fdfd5 [2] https://github.com/ruby-git/ruby-git/commit/97e1fff9edf0ee8cc7b49e49bd7faa4b1de16785 Signed-off-by: Simon Coffey --- lib/git/lib.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index eb6e3764..0886df4e 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -221,10 +221,10 @@ def object_size(sha) def commit_data(sha) sha = sha.to_s cdata = command_lines('cat-file', 'commit', sha) - process_commit_data(cdata, sha, 0) + process_commit_data(cdata, sha) end - def process_commit_data(data, sha = nil, indent = 4) + def process_commit_data(data, sha = nil) hsh = { 'sha' => sha, 'parent' => [] @@ -238,7 +238,7 @@ def process_commit_data(data, sha = nil, indent = 4) end end - hsh['message'] = data.collect {|line| line[indent..-1]}.join("\n") + "\n" + hsh['message'] = data.join("\n") + "\n" return hsh end From 7924fc9496241b27d08971a7de45d9880e89d56e Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 16:10:51 +0000 Subject: [PATCH 7/8] Remove #process_commit_data default sha value As with the previous commit, this default value was relevant when this method was also used to process git log output. It no longer is, and its only caller passes the sha value, so we can remove the default. Signed-off-by: Simon Coffey --- lib/git/lib.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 0886df4e..6fd1a1f2 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -224,7 +224,7 @@ def commit_data(sha) process_commit_data(cdata, sha) end - def process_commit_data(data, sha = nil) + def process_commit_data(data, sha) hsh = { 'sha' => sha, 'parent' => [] From 392cdf55c53fddf05c45af2d5f1bbf10d6a2c16b Mon Sep 17 00:00:00 2001 From: Simon Coffey Date: Mon, 16 Jan 2023 17:22:12 +0000 Subject: [PATCH 8/8] Remove #process_tag_data indent parameter As with the previous commits, this indent parameter appears to be a relic of old usage; the #process_tag_data method has a single caller that explicitly sets the indent to zero, so the indent parameter and associated handling can just be removed. Signed-off-by: Simon Coffey --- lib/git/lib.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/git/lib.rb b/lib/git/lib.rb index 6fd1a1f2..35791c02 100644 --- a/lib/git/lib.rb +++ b/lib/git/lib.rb @@ -261,17 +261,17 @@ def each_cat_file_header(data) def tag_data(name) sha = sha.to_s tdata = command_lines('cat-file', 'tag', name) - process_tag_data(tdata, name, 0) + process_tag_data(tdata, name) end - def process_tag_data(data, name, indent=4) + def process_tag_data(data, name) hsh = { 'name' => name } each_cat_file_header(data) do |key, value| hsh[key] = value end - hsh['message'] = data.collect {|line| line[indent..-1]}.join("\n") + "\n" + hsh['message'] = data.join("\n") + "\n" return hsh end