From 9525ac822aa902f5325f17e7b08ffb60b683e0e7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Jun 2023 07:52:51 +0200 Subject: [PATCH 01/16] thanks clippy --- gitoxide-core/src/repository/attributes/validate_baseline.rs | 2 +- gix-actor/tests/identity/mod.rs | 2 +- gix-actor/tests/signature/mod.rs | 2 +- gix-features/tests/pipe.rs | 2 +- gix-pack/src/cache/delta/traverse/resolve.rs | 5 ++--- gix-transport/tests/client/blocking_io/http/mod.rs | 2 +- gix/tests/repository/remote.rs | 4 ++-- gix/tests/repository/worktree.rs | 2 +- 8 files changed, 10 insertions(+), 11 deletions(-) diff --git a/gitoxide-core/src/repository/attributes/validate_baseline.rs b/gitoxide-core/src/repository/attributes/validate_baseline.rs index b3133ac8b6..05c79d5a56 100644 --- a/gitoxide-core/src/repository/attributes/validate_baseline.rs +++ b/gitoxide-core/src/repository/attributes/validate_baseline.rs @@ -100,7 +100,7 @@ pub(crate) mod function { }); let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it")); - let mut lines = stdout.lines().filter_map(Result::ok).peekable(); + let mut lines = stdout.lines().map_while(Result::ok).peekable(); while let Some(baseline) = parse_attributes(&mut lines) { if tx_base.send(baseline).is_err() { child.kill().ok(); diff --git a/gix-actor/tests/identity/mod.rs b/gix-actor/tests/identity/mod.rs index c94f7dc8ab..2085e1fc54 100644 --- a/gix-actor/tests/identity/mod.rs +++ b/gix-actor/tests/identity/mod.rs @@ -3,7 +3,7 @@ use gix_actor::Identity; #[test] fn round_trip() -> gix_testtools::Result { - static DEFAULTS: &'static [&'static [u8]] = &[ + static DEFAULTS: &[&[u8]] = &[ b"Sebastian Thiel ", ".. ☺️Sebastian 王知明 Thiel🙌 .. ".as_bytes(), b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere " diff --git a/gix-actor/tests/signature/mod.rs b/gix-actor/tests/signature/mod.rs index b9902de1b4..6bb0da5868 100644 --- a/gix-actor/tests/signature/mod.rs +++ b/gix-actor/tests/signature/mod.rs @@ -64,7 +64,7 @@ fn trim() { #[test] fn round_trip() -> Result<(), Box> { - static DEFAULTS: &'static [&'static [u8]] = &[ + static DEFAULTS: &[&[u8]] = &[ b"Sebastian Thiel 1 -0030", ".. ☺️Sebastian 王知明 Thiel🙌 .. 1528473343 +0230".as_bytes(), b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere 1528473343 +0230" diff --git a/gix-features/tests/pipe.rs b/gix-features/tests/pipe.rs index 4815000bf9..f2055ea9e3 100644 --- a/gix-features/tests/pipe.rs +++ b/gix-features/tests/pipe.rs @@ -49,7 +49,7 @@ mod io { writer.write_all(b"b\nc\n").expect("success"); drop(writer); assert_eq!( - reader.lines().flat_map(Result::ok).collect::>(), + reader.lines().map_while(Result::ok).collect::>(), vec!["a", "b", "c"] ) } diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index 5b9b1f4bbc..73325050b0 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -413,11 +413,10 @@ fn decompress_all_at_once_with(b: &[u8], decompressed_len: usize, out: &mut Vec< INFLATE.with(|inflate| { let mut inflate = inflate.borrow_mut(); inflate.reset(); - let res = inflate.once(b, out).map_err(|err| Error::ZlibInflate { + inflate.once(b, out).map_err(|err| Error::ZlibInflate { source: err, message: "Failed to decompress entry", - }); - res + }) })?; Ok(()) } diff --git a/gix-transport/tests/client/blocking_io/http/mod.rs b/gix-transport/tests/client/blocking_io/http/mod.rs index 5724ca8d2d..5a9418eef2 100644 --- a/gix-transport/tests/client/blocking_io/http/mod.rs +++ b/gix-transport/tests/client/blocking_io/http/mod.rs @@ -269,7 +269,7 @@ fn handshake_v1() -> crate::Result { let refs = refs .expect("v1 protocol provides refs") .lines() - .flat_map(Result::ok) + .map_while(Result::ok) .collect::>(); assert_eq!( refs, diff --git a/gix/tests/repository/remote.rs b/gix/tests/repository/remote.rs index 044ceeb0b4..ae31fd8f73 100644 --- a/gix/tests/repository/remote.rs +++ b/gix/tests/repository/remote.rs @@ -192,7 +192,7 @@ mod find_remote { let repo = remote::repo("url-rewriting"); let baseline = std::fs::read(repo.git_dir().join("baseline.git"))?; - let mut baseline = baseline.lines().filter_map(Result::ok); + let mut baseline = baseline.lines().map_while(Result::ok); let expected_fetch_url: BString = baseline.next().expect("fetch").into(); let expected_push_url: BString = baseline.next().expect("push").into(); @@ -223,7 +223,7 @@ mod find_remote { let repo = remote::repo("bad-url-rewriting"); let baseline = std::fs::read(repo.git_dir().join("baseline.git"))?; - let mut baseline = baseline.lines().filter_map(Result::ok); + let mut baseline = baseline.lines().map_while(Result::ok); let expected_fetch_url: BString = baseline.next().expect("fetch").into(); let expected_push_url: BString = baseline.next().expect("push").into(); assert_eq!( diff --git a/gix/tests/repository/worktree.rs b/gix/tests/repository/worktree.rs index 6679fe9e7f..6516614765 100644 --- a/gix/tests/repository/worktree.rs +++ b/gix/tests/repository/worktree.rs @@ -105,7 +105,7 @@ mod with_core_worktree_config { std::fs::read(git_dir.join("status.baseline")) .unwrap() .lines() - .filter_map(Result::ok) + .map_while(Result::ok) .filter(|line| line.contains(" D ")) .count() } From 3c1bfd586c83cc7678fb2a2d78a685b80ba33284 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Jun 2023 08:40:14 +0200 Subject: [PATCH 02/16] fix journey test (remove usage of `tree`) It seems homebrew `tree` keeps messing up directory counts, which may make it incompatible with CI and generaly makes it impossible to use in a cross-platform manner. Thus it was replaced with `find`. --- tests/journey/ein.sh | 6 +- tests/journey/gix.sh | 8 +- ...s-dir-skip-checks-success-tree-miniz-oxide | 80 ++++++---------- ...r-skip-checks-success-tree-miniz-oxide-max | 80 ++++++---------- ...jects-dir-skip-checks-success-tree-zlib-ng | 83 ++++++----------- .../explode/with-objects-dir-success-tree | 91 ++++++------------- .../directory-structure-after-organize | 22 ++--- .../tool/organize/initial-directory-structure | 26 +++--- 8 files changed, 141 insertions(+), 255 deletions(-) diff --git a/tests/journey/ein.sh b/tests/journey/ein.sh index 3c67d8c687..27d1caf2fe 100644 --- a/tests/journey/ein.sh +++ b/tests/journey/ein.sh @@ -112,7 +112,7 @@ title "Porcelain ${kind}" it "does not change the directory structure at all" && { WITH_SNAPSHOT="$snapshot/initial-directory-structure" \ - expect_run $SUCCESSFULLY tree -L 2 + expect_run_sh $SUCCESSFULLY 'find . -maxdepth 2 | sort' } ) @@ -124,7 +124,7 @@ title "Porcelain ${kind}" it "changes the directory structure" && { WITH_SNAPSHOT="$snapshot/directory-structure-after-organize" \ - expect_run $SUCCESSFULLY tree -L 2 + expect_run_sh $SUCCESSFULLY 'find . -maxdepth 2 | sort' } ) @@ -136,7 +136,7 @@ title "Porcelain ${kind}" it "does not alter the directory structure as these are already in place" && { WITH_SNAPSHOT="$snapshot/directory-structure-after-organize" \ - expect_run $SUCCESSFULLY tree -L 2 + expect_run_sh $SUCCESSFULLY 'find . -maxdepth 2 | sort' } ) ) diff --git a/tests/journey/gix.sh b/tests/journey/gix.sh index 24902512d2..50a4b3efde 100644 --- a/tests/journey/gix.sh +++ b/tests/journey/gix.sh @@ -462,7 +462,7 @@ title "gix attributes" expect_run $WITH_FAILURE test -e "${PACK_FILE}".idx } - (with_program tree + (with_program find if test "$kind" = "small" ; then suffix=miniz-oxide @@ -473,7 +473,7 @@ title "gix attributes" fi it "creates all pack objects, but the broken ones" && { WITH_SNAPSHOT="$snapshot/broken-with-objects-dir-skip-checks-success-tree-$suffix" \ - expect_run $SUCCESSFULLY tree + expect_run_sh $SUCCESSFULLY 'find . -type f | sort' } ) ) @@ -495,10 +495,10 @@ title "gix attributes" "${PACK_FILE}.pack" . } - (with_program tree + (with_program find it "creates all pack objects" && { WITH_SNAPSHOT="$snapshot/with-objects-dir-success-tree" \ - expect_run $SUCCESSFULLY tree + expect_run_sh $SUCCESSFULLY 'find . -type f | sort' } ) ) diff --git a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide index 44c9b82024..b7677b2a55 100644 --- a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide +++ b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide @@ -1,54 +1,26 @@ -. -├── 0e -│   └── ad45fc727edcf5cadca25ef922284f32bb6fc1 -├── 15 -│   └── 926d8d6d17d1cbdf7f03c457e8ff983270f363 -├── 18 -│   └── bd3fc20b0565f94bce0a3e94b6a83b26b88627 -├── 1d -│   └── fd336d2290794b0b1f80d98af33f725da6f42d -├── 2b -│   └── 621c1a3aac23b8258885a9b4658d9ac993742f -├── 2c -│   └── 1e59ee54facb7d72c0061d06b9fe3889f357a9 -├── 2d -│   └── ad8b277db3a95919bd904133d7e7cc3e323cb9 -├── 3a -│   └── b660ad62dd7c8c8bd637aa9bc1c2843a8439fe -├── 3d -│   └── 650a1c41a4529863818fd613b95e83668bbfc1 -├── 41 -│   └── 97ce3c6d943759e1088a0298b64571b4bc725a -├── 50 -│   └── 1b297447a8255d3533c6858bb692575cdefaa0 -├── 5d -│   └── e2eda652f29103c0d160f8c05d7e83b653a157 -├── 66 -│   └── 74d310d179400358d581f9725cbd4a2c32e3bf -├── 68 -│   └── b95733c796b12571fb1f656062a15a78e7dcf4 -├── 83 -│   └── d9602eccfc733a550812ce492d4caa0af625c8 -├── 84 -│   ├── 26f672fc65239135b1f1580bb79ecb16fd05f0 -│   └── 81dbefa2fb9398a673fe1f48dc480c1f558890 -├── 85 -│   └── 48234cfc7b4f0c9475d24d4c386783533a8034 -├── 88 -│   └── 58983d81b0eef76eb55d21a0d96b7b16846eca -├── af -│   └── 4f6405296dec699321ca59d48583ffa0323b0e -├── b2 -│   └── 025146d0718d953036352f8435cfa392b1d799 -├── bb -│   └── a287531b3a845faa032a8fef3e6d70d185c89b -├── bd -│   └── 91890c62d85ec16aadd3fb991b3ad7a365adde -├── cb -│   └── 572206d9dac4ba52878e7e1a4a7028d85707ab -├── e2 -│   └── 34c232ce0b8acef3f43fa34c036e68522b5612 -└── e8 - └── 00b9c207e17f9b11e321cc1fba5dfe08af4222 - -25 directories, 26 files \ No newline at end of file +./0e/ad45fc727edcf5cadca25ef922284f32bb6fc1 +./15/926d8d6d17d1cbdf7f03c457e8ff983270f363 +./18/bd3fc20b0565f94bce0a3e94b6a83b26b88627 +./1d/fd336d2290794b0b1f80d98af33f725da6f42d +./2b/621c1a3aac23b8258885a9b4658d9ac993742f +./2c/1e59ee54facb7d72c0061d06b9fe3889f357a9 +./2d/ad8b277db3a95919bd904133d7e7cc3e323cb9 +./3a/b660ad62dd7c8c8bd637aa9bc1c2843a8439fe +./3d/650a1c41a4529863818fd613b95e83668bbfc1 +./41/97ce3c6d943759e1088a0298b64571b4bc725a +./50/1b297447a8255d3533c6858bb692575cdefaa0 +./5d/e2eda652f29103c0d160f8c05d7e83b653a157 +./66/74d310d179400358d581f9725cbd4a2c32e3bf +./68/b95733c796b12571fb1f656062a15a78e7dcf4 +./83/d9602eccfc733a550812ce492d4caa0af625c8 +./84/26f672fc65239135b1f1580bb79ecb16fd05f0 +./84/81dbefa2fb9398a673fe1f48dc480c1f558890 +./85/48234cfc7b4f0c9475d24d4c386783533a8034 +./88/58983d81b0eef76eb55d21a0d96b7b16846eca +./af/4f6405296dec699321ca59d48583ffa0323b0e +./b2/025146d0718d953036352f8435cfa392b1d799 +./bb/a287531b3a845faa032a8fef3e6d70d185c89b +./bd/91890c62d85ec16aadd3fb991b3ad7a365adde +./cb/572206d9dac4ba52878e7e1a4a7028d85707ab +./e2/34c232ce0b8acef3f43fa34c036e68522b5612 +./e8/00b9c207e17f9b11e321cc1fba5dfe08af4222 \ No newline at end of file diff --git a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide-max b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide-max index 44c9b82024..b7677b2a55 100644 --- a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide-max +++ b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-miniz-oxide-max @@ -1,54 +1,26 @@ -. -├── 0e -│   └── ad45fc727edcf5cadca25ef922284f32bb6fc1 -├── 15 -│   └── 926d8d6d17d1cbdf7f03c457e8ff983270f363 -├── 18 -│   └── bd3fc20b0565f94bce0a3e94b6a83b26b88627 -├── 1d -│   └── fd336d2290794b0b1f80d98af33f725da6f42d -├── 2b -│   └── 621c1a3aac23b8258885a9b4658d9ac993742f -├── 2c -│   └── 1e59ee54facb7d72c0061d06b9fe3889f357a9 -├── 2d -│   └── ad8b277db3a95919bd904133d7e7cc3e323cb9 -├── 3a -│   └── b660ad62dd7c8c8bd637aa9bc1c2843a8439fe -├── 3d -│   └── 650a1c41a4529863818fd613b95e83668bbfc1 -├── 41 -│   └── 97ce3c6d943759e1088a0298b64571b4bc725a -├── 50 -│   └── 1b297447a8255d3533c6858bb692575cdefaa0 -├── 5d -│   └── e2eda652f29103c0d160f8c05d7e83b653a157 -├── 66 -│   └── 74d310d179400358d581f9725cbd4a2c32e3bf -├── 68 -│   └── b95733c796b12571fb1f656062a15a78e7dcf4 -├── 83 -│   └── d9602eccfc733a550812ce492d4caa0af625c8 -├── 84 -│   ├── 26f672fc65239135b1f1580bb79ecb16fd05f0 -│   └── 81dbefa2fb9398a673fe1f48dc480c1f558890 -├── 85 -│   └── 48234cfc7b4f0c9475d24d4c386783533a8034 -├── 88 -│   └── 58983d81b0eef76eb55d21a0d96b7b16846eca -├── af -│   └── 4f6405296dec699321ca59d48583ffa0323b0e -├── b2 -│   └── 025146d0718d953036352f8435cfa392b1d799 -├── bb -│   └── a287531b3a845faa032a8fef3e6d70d185c89b -├── bd -│   └── 91890c62d85ec16aadd3fb991b3ad7a365adde -├── cb -│   └── 572206d9dac4ba52878e7e1a4a7028d85707ab -├── e2 -│   └── 34c232ce0b8acef3f43fa34c036e68522b5612 -└── e8 - └── 00b9c207e17f9b11e321cc1fba5dfe08af4222 - -25 directories, 26 files \ No newline at end of file +./0e/ad45fc727edcf5cadca25ef922284f32bb6fc1 +./15/926d8d6d17d1cbdf7f03c457e8ff983270f363 +./18/bd3fc20b0565f94bce0a3e94b6a83b26b88627 +./1d/fd336d2290794b0b1f80d98af33f725da6f42d +./2b/621c1a3aac23b8258885a9b4658d9ac993742f +./2c/1e59ee54facb7d72c0061d06b9fe3889f357a9 +./2d/ad8b277db3a95919bd904133d7e7cc3e323cb9 +./3a/b660ad62dd7c8c8bd637aa9bc1c2843a8439fe +./3d/650a1c41a4529863818fd613b95e83668bbfc1 +./41/97ce3c6d943759e1088a0298b64571b4bc725a +./50/1b297447a8255d3533c6858bb692575cdefaa0 +./5d/e2eda652f29103c0d160f8c05d7e83b653a157 +./66/74d310d179400358d581f9725cbd4a2c32e3bf +./68/b95733c796b12571fb1f656062a15a78e7dcf4 +./83/d9602eccfc733a550812ce492d4caa0af625c8 +./84/26f672fc65239135b1f1580bb79ecb16fd05f0 +./84/81dbefa2fb9398a673fe1f48dc480c1f558890 +./85/48234cfc7b4f0c9475d24d4c386783533a8034 +./88/58983d81b0eef76eb55d21a0d96b7b16846eca +./af/4f6405296dec699321ca59d48583ffa0323b0e +./b2/025146d0718d953036352f8435cfa392b1d799 +./bb/a287531b3a845faa032a8fef3e6d70d185c89b +./bd/91890c62d85ec16aadd3fb991b3ad7a365adde +./cb/572206d9dac4ba52878e7e1a4a7028d85707ab +./e2/34c232ce0b8acef3f43fa34c036e68522b5612 +./e8/00b9c207e17f9b11e321cc1fba5dfe08af4222 \ No newline at end of file diff --git a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-zlib-ng b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-zlib-ng index 3b1ec68144..da10cdb1f2 100644 --- a/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-zlib-ng +++ b/tests/snapshots/plumbing/no-repo/pack/explode/broken-with-objects-dir-skip-checks-success-tree-zlib-ng @@ -1,56 +1,27 @@ -. -├── 0e -│   └── ad45fc727edcf5cadca25ef922284f32bb6fc1 -├── 15 -│   └── 926d8d6d17d1cbdf7f03c457e8ff983270f363 -├── 18 -│   └── bd3fc20b0565f94bce0a3e94b6a83b26b88627 -├── 1d -│   └── fd336d2290794b0b1f80d98af33f725da6f42d -├── 2b -│   └── 621c1a3aac23b8258885a9b4658d9ac993742f -├── 2c -│   └── 1e59ee54facb7d72c0061d06b9fe3889f357a9 -├── 2d -│   └── ad8b277db3a95919bd904133d7e7cc3e323cb9 -├── 3a -│   └── b660ad62dd7c8c8bd637aa9bc1c2843a8439fe -├── 3d -│   └── 650a1c41a4529863818fd613b95e83668bbfc1 -├── 41 -│   └── 97ce3c6d943759e1088a0298b64571b4bc725a -├── 50 -│   └── 1b297447a8255d3533c6858bb692575cdefaa0 -├── 5d -│   └── e2eda652f29103c0d160f8c05d7e83b653a157 -├── 66 -│   └── 74d310d179400358d581f9725cbd4a2c32e3bf -├── 68 -│   └── b95733c796b12571fb1f656062a15a78e7dcf4 -├── 83 -│   └── d9602eccfc733a550812ce492d4caa0af625c8 -├── 84 -│   ├── 26f672fc65239135b1f1580bb79ecb16fd05f0 -│   └── 81dbefa2fb9398a673fe1f48dc480c1f558890 -├── 85 -│   └── 48234cfc7b4f0c9475d24d4c386783533a8034 -├── 88 -│   └── 58983d81b0eef76eb55d21a0d96b7b16846eca -├── a2 -│   └── 9ebd0e0fcbcd2a0842dd44cc7c22a90a310a3a -├── af -│   └── 4f6405296dec699321ca59d48583ffa0323b0e -├── b2 -│   └── 025146d0718d953036352f8435cfa392b1d799 -├── bb -│   └── a287531b3a845faa032a8fef3e6d70d185c89b -├── bd -│   └── 91890c62d85ec16aadd3fb991b3ad7a365adde -├── cb -│   └── 572206d9dac4ba52878e7e1a4a7028d85707ab -├── e2 -│   └── 34c232ce0b8acef3f43fa34c036e68522b5612 -└── e8 - └── 00b9c207e17f9b11e321cc1fba5dfe08af4222 - -26 directories, 27 files \ No newline at end of file +./0e/ad45fc727edcf5cadca25ef922284f32bb6fc1 +./15/926d8d6d17d1cbdf7f03c457e8ff983270f363 +./18/bd3fc20b0565f94bce0a3e94b6a83b26b88627 +./1d/fd336d2290794b0b1f80d98af33f725da6f42d +./2b/621c1a3aac23b8258885a9b4658d9ac993742f +./2c/1e59ee54facb7d72c0061d06b9fe3889f357a9 +./2d/ad8b277db3a95919bd904133d7e7cc3e323cb9 +./3a/b660ad62dd7c8c8bd637aa9bc1c2843a8439fe +./3d/650a1c41a4529863818fd613b95e83668bbfc1 +./41/97ce3c6d943759e1088a0298b64571b4bc725a +./50/1b297447a8255d3533c6858bb692575cdefaa0 +./5d/e2eda652f29103c0d160f8c05d7e83b653a157 +./66/74d310d179400358d581f9725cbd4a2c32e3bf +./68/b95733c796b12571fb1f656062a15a78e7dcf4 +./83/d9602eccfc733a550812ce492d4caa0af625c8 +./84/26f672fc65239135b1f1580bb79ecb16fd05f0 +./84/81dbefa2fb9398a673fe1f48dc480c1f558890 +./85/48234cfc7b4f0c9475d24d4c386783533a8034 +./88/58983d81b0eef76eb55d21a0d96b7b16846eca +./a2/9ebd0e0fcbcd2a0842dd44cc7c22a90a310a3a +./af/4f6405296dec699321ca59d48583ffa0323b0e +./b2/025146d0718d953036352f8435cfa392b1d799 +./bb/a287531b3a845faa032a8fef3e6d70d185c89b +./bd/91890c62d85ec16aadd3fb991b3ad7a365adde +./cb/572206d9dac4ba52878e7e1a4a7028d85707ab +./e2/34c232ce0b8acef3f43fa34c036e68522b5612 +./e8/00b9c207e17f9b11e321cc1fba5dfe08af4222 \ No newline at end of file diff --git a/tests/snapshots/plumbing/no-repo/pack/explode/with-objects-dir-success-tree b/tests/snapshots/plumbing/no-repo/pack/explode/with-objects-dir-success-tree index 3632ea6c75..d344805e5e 100644 --- a/tests/snapshots/plumbing/no-repo/pack/explode/with-objects-dir-success-tree +++ b/tests/snapshots/plumbing/no-repo/pack/explode/with-objects-dir-success-tree @@ -1,61 +1,30 @@ -. -├── 0e -│   └── ad45fc727edcf5cadca25ef922284f32bb6fc1 -├── 15 -│   └── 926d8d6d17d1cbdf7f03c457e8ff983270f363 -├── 18 -│   └── bd3fc20b0565f94bce0a3e94b6a83b26b88627 -├── 1a -│   └── 480b442042edd4a6bacae41bf4113727e7a130 -├── 1d -│   └── fd336d2290794b0b1f80d98af33f725da6f42d -├── 2b -│   └── 621c1a3aac23b8258885a9b4658d9ac993742f -├── 2c -│   └── 1e59ee54facb7d72c0061d06b9fe3889f357a9 -├── 2d -│   └── ad8b277db3a95919bd904133d7e7cc3e323cb9 -├── 3a -│   └── b660ad62dd7c8c8bd637aa9bc1c2843a8439fe -├── 3d -│   └── 650a1c41a4529863818fd613b95e83668bbfc1 -├── 41 -│   └── 97ce3c6d943759e1088a0298b64571b4bc725a -├── 4c -│   ├── 35f641dbedaed230b5588fdc106c4538b4d09b -│   └── 97a057e41159f9767cf8704ed5ae181adf4d8d -├── 50 -│   └── 1b297447a8255d3533c6858bb692575cdefaa0 -├── 5d -│   └── e2eda652f29103c0d160f8c05d7e83b653a157 -├── 66 -│   └── 74d310d179400358d581f9725cbd4a2c32e3bf -├── 68 -│   └── b95733c796b12571fb1f656062a15a78e7dcf4 -├── 83 -│   └── d9602eccfc733a550812ce492d4caa0af625c8 -├── 84 -│   ├── 26f672fc65239135b1f1580bb79ecb16fd05f0 -│   └── 81dbefa2fb9398a673fe1f48dc480c1f558890 -├── 85 -│   └── 48234cfc7b4f0c9475d24d4c386783533a8034 -├── 88 -│   └── 58983d81b0eef76eb55d21a0d96b7b16846eca -├── ac -│   └── f86bca46d2b53d19a5a382e10def38d3e224da -├── af -│   └── 4f6405296dec699321ca59d48583ffa0323b0e -├── b2 -│   └── 025146d0718d953036352f8435cfa392b1d799 -├── bb -│   └── a287531b3a845faa032a8fef3e6d70d185c89b -├── bd -│   └── 91890c62d85ec16aadd3fb991b3ad7a365adde -├── cb -│   └── 572206d9dac4ba52878e7e1a4a7028d85707ab -├── e2 -│   └── 34c232ce0b8acef3f43fa34c036e68522b5612 -└── e8 - └── 00b9c207e17f9b11e321cc1fba5dfe08af4222 - -28 directories, 30 files \ No newline at end of file +./0e/ad45fc727edcf5cadca25ef922284f32bb6fc1 +./15/926d8d6d17d1cbdf7f03c457e8ff983270f363 +./18/bd3fc20b0565f94bce0a3e94b6a83b26b88627 +./1a/480b442042edd4a6bacae41bf4113727e7a130 +./1d/fd336d2290794b0b1f80d98af33f725da6f42d +./2b/621c1a3aac23b8258885a9b4658d9ac993742f +./2c/1e59ee54facb7d72c0061d06b9fe3889f357a9 +./2d/ad8b277db3a95919bd904133d7e7cc3e323cb9 +./3a/b660ad62dd7c8c8bd637aa9bc1c2843a8439fe +./3d/650a1c41a4529863818fd613b95e83668bbfc1 +./41/97ce3c6d943759e1088a0298b64571b4bc725a +./4c/35f641dbedaed230b5588fdc106c4538b4d09b +./4c/97a057e41159f9767cf8704ed5ae181adf4d8d +./50/1b297447a8255d3533c6858bb692575cdefaa0 +./5d/e2eda652f29103c0d160f8c05d7e83b653a157 +./66/74d310d179400358d581f9725cbd4a2c32e3bf +./68/b95733c796b12571fb1f656062a15a78e7dcf4 +./83/d9602eccfc733a550812ce492d4caa0af625c8 +./84/26f672fc65239135b1f1580bb79ecb16fd05f0 +./84/81dbefa2fb9398a673fe1f48dc480c1f558890 +./85/48234cfc7b4f0c9475d24d4c386783533a8034 +./88/58983d81b0eef76eb55d21a0d96b7b16846eca +./ac/f86bca46d2b53d19a5a382e10def38d3e224da +./af/4f6405296dec699321ca59d48583ffa0323b0e +./b2/025146d0718d953036352f8435cfa392b1d799 +./bb/a287531b3a845faa032a8fef3e6d70d185c89b +./bd/91890c62d85ec16aadd3fb991b3ad7a365adde +./cb/572206d9dac4ba52878e7e1a4a7028d85707ab +./e2/34c232ce0b8acef3f43fa34c036e68522b5612 +./e8/00b9c207e17f9b11e321cc1fba5dfe08af4222 \ No newline at end of file diff --git a/tests/snapshots/porcelain/tool/organize/directory-structure-after-organize b/tests/snapshots/porcelain/tool/organize/directory-structure-after-organize index a3bde92677..5ba90531b8 100644 --- a/tests/snapshots/porcelain/tool/organize/directory-structure-after-organize +++ b/tests/snapshots/porcelain/tool/organize/directory-structure-after-organize @@ -1,12 +1,12 @@ . -├── dir -├── example.com -│   ├── a-repo-with-extension -│   ├── one-origin -│   └── origin-and-fork -├── no-origin -│   └── a -└── special-origin - └── a - -7 directories, 2 files \ No newline at end of file +./dir +./example.com +./example.com/a-repo-with-extension +./example.com/one-origin +./example.com/origin-and-fork +./no-origin +./no-origin/.git +./no-origin/a +./special-origin +./special-origin/.git +./special-origin/a \ No newline at end of file diff --git a/tests/snapshots/porcelain/tool/organize/initial-directory-structure b/tests/snapshots/porcelain/tool/organize/initial-directory-structure index b66cef08be..15232897d1 100644 --- a/tests/snapshots/porcelain/tool/organize/initial-directory-structure +++ b/tests/snapshots/porcelain/tool/organize/initial-directory-structure @@ -1,13 +1,15 @@ . -├── a-non-bare-repo-with-extension.git -│   └── a -├── dir -│   └── one-origin -├── no-origin -│   └── a -├── origin-and-fork -│   └── a -└── special-origin - └── a - -6 directories, 4 files \ No newline at end of file +./a-non-bare-repo-with-extension.git +./a-non-bare-repo-with-extension.git/.git +./a-non-bare-repo-with-extension.git/a +./dir +./dir/one-origin +./no-origin +./no-origin/.git +./no-origin/a +./origin-and-fork +./origin-and-fork/.git +./origin-and-fork/a +./special-origin +./special-origin/.git +./special-origin/a \ No newline at end of file From f3193c9729c935be844df8faeb2e696844ba8d1f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 1 Jun 2023 19:05:28 +0200 Subject: [PATCH 03/16] Add note about corrected generation dates Right now we don't handle them at all, which might lead to issues in huge repos. --- crate-status.md | 1 + gix-commitgraph/src/file/commit.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/crate-status.md b/crate-status.md index 2a195297cf..f7ddf912ce 100644 --- a/crate-status.md +++ b/crate-status.md @@ -551,6 +551,7 @@ The git staging area. * [x] read-only access * [x] Graph lookup of commit information to obtain timestamps, generation and parents, and extra edges + * [ ] [Corrected generation dates](https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d) * [ ] Bloom filter index * [ ] Bloom filter data * [ ] create and update graphs and graph files diff --git a/gix-commitgraph/src/file/commit.rs b/gix-commitgraph/src/file/commit.rs index 7f4219b6e5..14fe15e841 100644 --- a/gix-commitgraph/src/file/commit.rs +++ b/gix-commitgraph/src/file/commit.rs @@ -51,6 +51,9 @@ impl<'a> Commit<'a> { root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]), parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])), parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])), + // TODO: Add support for corrected commit date offset overflow. + // See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and + // https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test. generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2, commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap()) & 0x0003_ffff_ffff, From e011e360fb2db0288f718cb3bb2b28b4652bc8ae Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 1 Jun 2023 10:34:25 +0200 Subject: [PATCH 04/16] feat!: respect `core.useReplaceRefs` and remove `gitoxide.objects.noReplace`. The gitoxide specific variable wasn't needed in the first place. --- gix/src/config/cache/init.rs | 8 ++++---- gix/src/config/tree/sections/core.rs | 4 ++++ gix/src/config/tree/sections/gitoxide.rs | 5 ++--- gix/src/open/repository.rs | 6 +++--- gix/tests/repository/open.rs | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/gix/src/config/cache/init.rs b/gix/src/config/cache/init.rs index 19eae0eea1..6fcbcc4ec8 100644 --- a/gix/src/config/cache/init.rs +++ b/gix/src/config/cache/init.rs @@ -426,10 +426,6 @@ fn apply_environment_overrides( Some(Cow::Borrowed("objects".into())), objects, &[ - { - let key = &gitoxide::Objects::NO_REPLACE; - (env(key), key.name) - }, { let key = &gitoxide::Objects::REPLACE_REF_BASE; (env(key), key.name) @@ -491,6 +487,10 @@ fn apply_environment_overrides( let key = &Core::SSH_COMMAND; (env(key), key.name, git_prefix) }, + { + let key = &Core::USE_REPLACE_REFS; + (env(key), key.name, objects) + }, ] { if let Some(value) = var_as_bstring(var, permission) { section.push_with_comment( diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index 63e5facb34..93d2fcd017 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -63,6 +63,9 @@ impl Core { /// The `core.sshCommand` key. pub const SSH_COMMAND: keys::Executable = keys::Executable::new_executable("sshCommand", &config::Tree::CORE) .with_environment_override("GIT_SSH_COMMAND"); + /// The `core.useReplaceRefs` key. + pub const USE_REPLACE_REFS: keys::Boolean = keys::Boolean::new_boolean("useReplaceRefs", &config::Tree::CORE) + .with_environment_override("GIT_NO_REPLACE_OBJECTS"); } impl Section for Core { @@ -92,6 +95,7 @@ impl Section for Core { &Self::EXCLUDES_FILE, &Self::ATTRIBUTES_FILE, &Self::SSH_COMMAND, + &Self::USE_REPLACE_REFS, ] } } diff --git a/gix/src/config/tree/sections/gitoxide.rs b/gix/src/config/tree/sections/gitoxide.rs index 3d63e94867..f07d494fb7 100644 --- a/gix/src/config/tree/sections/gitoxide.rs +++ b/gix/src/config/tree/sections/gitoxide.rs @@ -317,8 +317,7 @@ mod subsections { .with_note("If unset or 0, there is no object cache") .with_environment_override("GITOXIDE_OBJECT_CACHE_MEMORY"); /// The `gitoxide.objects.noReplace` key. - pub const NO_REPLACE: keys::Boolean = keys::Boolean::new_boolean("noReplace", &Gitoxide::OBJECTS) - .with_environment_override("GIT_NO_REPLACE_OBJECTS"); + pub const NO_REPLACE: keys::Boolean = keys::Boolean::new_boolean("noReplace", &Gitoxide::OBJECTS); /// The `gitoxide.objects.replaceRefBase` key. pub const REPLACE_REF_BASE: keys::Any = keys::Any::new("replaceRefBase", &Gitoxide::OBJECTS).with_environment_override("GIT_REPLACE_REF_BASE"); @@ -330,7 +329,7 @@ mod subsections { } fn keys(&self) -> &[&dyn Key] { - &[&Self::CACHE_LIMIT, &Self::NO_REPLACE, &Self::REPLACE_REF_BASE] + &[&Self::CACHE_LIMIT, &Self::REPLACE_REF_BASE] } fn parent(&self) -> Option<&dyn Section> { diff --git a/gix/src/open/repository.rs b/gix/src/open/repository.rs index c7702b5f66..e89fdc4306 100644 --- a/gix/src/open/repository.rs +++ b/gix/src/open/repository.rs @@ -284,12 +284,12 @@ fn replacement_objects_refs_prefix( mut filter_config_section: fn(&gix_config::file::Metadata) -> bool, ) -> Result, Error> { let is_disabled = config - .boolean_filter_by_key("gitoxide.objects.noReplace", &mut filter_config_section) - .map(|b| gitoxide::Objects::NO_REPLACE.enrich_error(b)) + .boolean_filter_by_key("core.useReplaceRefs", &mut filter_config_section) + .map(|b| Core::USE_REPLACE_REFS.enrich_error(b)) .transpose() .with_leniency(lenient) .map_err(config::Error::ConfigBoolean)? - .unwrap_or_default(); + .unwrap_or(true); if is_disabled { return Ok(None); diff --git a/gix/tests/repository/open.rs b/gix/tests/repository/open.rs index 480038c540..d330f9490b 100644 --- a/gix/tests/repository/open.rs +++ b/gix/tests/repository/open.rs @@ -394,7 +394,7 @@ mod with_overrides { for (key, expected) in [ ("gitoxide.http.verbose", "true"), ("gitoxide.allow.protocolFromUser", "file-allowed"), - ("gitoxide.objects.noReplace", "no-replace"), + ("core.useReplaceRefs", "no-replace"), ("gitoxide.objects.replaceRefBase", "refs/replace-mine"), ("gitoxide.committer.nameFallback", "committer name"), ("gitoxide.committer.emailFallback", "committer email"), From d9258969047a53bcb437b4ebd9467e6c95d715d7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Jun 2023 21:42:20 +0200 Subject: [PATCH 05/16] Keep track of core.alternateRefsCommand --- src/plumbing/progress.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 71ee94028d..50f917c17d 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -116,6 +116,10 @@ static GIT_CONFIG: &[Record] = &[ config: "core.packedGitLimit", usage: NotApplicable { reason: "we target 32bit systems only and don't use a windowing mechanism" } }, + Record { + config: "core.alternateRefsCommand", + usage: NotPlanned { reason: "there is no need as we can perform the required operation in-binary. This could happen though if there is a use-case and demand." } + }, Record { config: "core.checkRoundtripEncoding", usage: Planned { note: Some("needed once working-tree-encoding attributes are supported") } From 11ad8a890a6233befb5d2b6b41caadbcb296c3f5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 1 Jun 2023 09:08:36 +0200 Subject: [PATCH 06/16] feat!: Add version of Graph that handles fully-parsed commits This renames `graph::Commit` to `graph::LazyCommit` to make space for `graph::Commit` to be a fully owned. `LazyCommit::to_owned()` was added to obtain fully owned `Commit` instances. Rename `Graph::try_lookup_and_insert()` to `Graph::try_lookup_or_insert()` and `Graph::try_lookup_and_insert_default()` to `Graph::try_lookup_or_insert_default()` Additionally, add the `peek()` and `iter_unordered()` method to the `PriorityQueue`, along with an implementation for `Clone` Rename `PriorityQueue::iter_random()` to `::iter_unordered()`. --- gix-revision/src/describe.rs | 2 +- gix-revision/src/graph/commit.rs | 148 ++++++++ gix-revision/src/graph/errors.rs | 54 +++ gix-revision/src/{graph.rs => graph/mod.rs} | 376 ++++++++++---------- gix-revision/src/lib.rs | 23 +- gix-revision/src/queue.rs | 35 +- gix-revision/src/spec/mod.rs | 51 +++ gix-revision/src/types.rs | 48 --- gix-revision/tests/graph/mod.rs | 8 + gix-revision/tests/revision.rs | 1 + 10 files changed, 495 insertions(+), 251 deletions(-) create mode 100644 gix-revision/src/graph/commit.rs create mode 100644 gix-revision/src/graph/errors.rs rename gix-revision/src/{graph.rs => graph/mod.rs} (58%) delete mode 100644 gix-revision/src/types.rs create mode 100644 gix-revision/tests/graph/mod.rs diff --git a/gix-revision/src/describe.rs b/gix-revision/src/describe.rs index b669577fdf..fa2ee20e01 100644 --- a/gix-revision/src/describe.rs +++ b/gix-revision/src/describe.rs @@ -327,7 +327,7 @@ pub(crate) mod function { let flags = graph[&commit]; if (flags & best_candidate.identity_bit) == best_candidate.identity_bit { if queue - .iter_random() + .iter_unordered() .all(|id| (graph[id] & best_candidate.identity_bit) == best_candidate.identity_bit) { break; diff --git a/gix-revision/src/graph/commit.rs b/gix-revision/src/graph/commit.rs new file mode 100644 index 0000000000..854d807374 --- /dev/null +++ b/gix-revision/src/graph/commit.rs @@ -0,0 +1,148 @@ +use super::LazyCommit; +use crate::graph::{Commit, CommitterTimestamp, Either, Generation}; +use smallvec::SmallVec; + +impl<'graph> LazyCommit<'graph> { + /// Return an iterator over the parents of this commit. + pub fn iter_parents(&self) -> Parents<'graph> { + let backing = match &self.backing { + Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), + Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), + }; + Parents { backing } + } + + /// Returns the timestamp at which this commit was created. + /// + /// This is the single-most important date for determining recency of commits. + /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. + pub fn committer_timestamp(&self) -> Result { + Ok(match &self.backing { + Either::Left(buf) => { + gix_object::CommitRefIter::from_bytes(buf) + .committer()? + .time + .seconds_since_unix_epoch as CommitterTimestamp + } + Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp(), + }) + } + + /// Returns the generation of the commit if it is backed by a commit graph. + pub fn generation(&self) -> Option { + match &self.backing { + Either::Left(_) => None, + Either::Right((cache, pos)) => cache.commit_at(*pos).generation().into(), + } + } + + /// Convert ourselves into an owned version, which effectively detaches us from the underlying graph. + /// Use `new_data()` to provide the `data` field for the owned `Commit`. + pub fn to_owned(&self, new_data: impl FnOnce() -> T) -> Result, to_owned::Error> { + let data = new_data(); + Ok(match &self.backing { + Either::Left(buf) => { + use gix_object::commit::ref_iter::Token; + let iter = gix_object::CommitRefIter::from_bytes(buf); + let mut parents = SmallVec::default(); + let mut timestamp = None; + for token in iter { + match token? { + Token::Tree { .. } => {} + Token::Parent { id } => parents.push(id), + Token::Author { .. } => {} + Token::Committer { signature } => { + timestamp = Some(signature.time.seconds_since_unix_epoch as CommitterTimestamp); + break; + } + _ => { + unreachable!( + "we break naturally after seeing the committer which is always at the same spot" + ) + } + } + } + Commit { + parents, + commit_time: timestamp.unwrap_or_default(), + generation: None, + data, + } + } + Either::Right((cache, pos)) => { + let mut parents = SmallVec::default(); + let commit = cache.commit_at(*pos); + for pos in commit.iter_parents() { + let pos = pos?; + parents.push(cache.commit_at(pos).id().to_owned()); + } + Commit { + parents, + commit_time: commit.committer_timestamp(), + generation: Some(commit.generation()), + data, + } + } + }) + } +} + +/// An iterator over the parents of a commit. +pub struct Parents<'graph> { + backing: Either< + gix_object::CommitRefIter<'graph>, + ( + &'graph gix_commitgraph::Graph, + gix_commitgraph::file::commit::Parents<'graph>, + ), + >, +} + +impl<'graph> Iterator for Parents<'graph> { + type Item = Result; + + fn next(&mut self) -> Option { + match &mut self.backing { + Either::Left(it) => { + for token in it { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => return Some(Ok(id)), + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + None + } + Either::Right((cache, it)) => it + .next() + .map(|r| r.map(|pos| cache.id_at(pos).to_owned()).map_err(Into::into)), + } + } +} + +/// +pub mod iter_parents { + /// The error returned by the [`Parents`][super::Parents] iterator. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred when parsing commit parents")] + DecodeCommit(#[from] gix_object::decode::Error), + #[error("An error occurred when parsing parents from the commit graph")] + DecodeCommitGraph(#[from] gix_commitgraph::file::commit::Error), + } +} + +/// +pub mod to_owned { + /// The error returned by [`to_owned()`][crate::graph::LazyCommit::to_owned()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("A commit could not be decoded during traversal")] + Decode(#[from] gix_object::decode::Error), + #[error("Could not find commit position in graph when traversing parents")] + CommitGraphParent(#[from] gix_commitgraph::file::commit::Error), + } +} diff --git a/gix-revision/src/graph/errors.rs b/gix-revision/src/graph/errors.rs new file mode 100644 index 0000000000..cd3e926118 --- /dev/null +++ b/gix-revision/src/graph/errors.rs @@ -0,0 +1,54 @@ +/// +pub mod lookup { + /// The error returned by [`try_lookup()`][crate::Graph::try_lookup()]. + #[derive(Debug, thiserror::Error)] + #[error("There was an error looking up a commit")] + pub struct Error { + #[from] + source: Box, + } + + /// + pub mod commit { + /// The error returned by [`try_lookup_commit()`][crate::Graph::try_lookup_commit()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("There was an error looking up a commit")] + Find(#[from] crate::graph::lookup::Error), + #[error(transparent)] + ToOwned(#[from] crate::graph::commit::to_owned::Error), + } + } + + /// + pub mod existing { + /// The error returned by [`lookup()`][crate::Graph::lookup()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Find(#[from] super::Error), + #[error("Commit could not be found")] + Missing, + } + } +} + +/// +pub mod insert_parents { + use crate::graph::commit::iter_parents; + use crate::graph::lookup; + + /// The error returned by [`insert_parents()`][crate::Graph::insert_parents()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Lookup(#[from] lookup::existing::Error), + #[error("A commit could not be decoded during traversal")] + Decode(#[from] gix_object::decode::Error), + #[error(transparent)] + Parent(#[from] iter_parents::Error), + } +} diff --git a/gix-revision/src/graph.rs b/gix-revision/src/graph/mod.rs similarity index 58% rename from gix-revision/src/graph.rs rename to gix-revision/src/graph/mod.rs index 88f33db25e..ea29defab7 100644 --- a/gix-revision/src/graph.rs +++ b/gix-revision/src/graph/mod.rs @@ -1,11 +1,24 @@ use crate::Graph; use gix_hash::oid; use smallvec::SmallVec; +use std::fmt::Formatter; use std::ops::Index; +/// +pub mod commit; + +mod errors; +pub use errors::{insert_parents, lookup}; + /// The time in seconds since unix epoch at which a commit was created. pub type CommitterTimestamp = u64; +/// The generation away from the HEAD of graph, useful to limit algorithms by topological depth as well. +/// +/// 0 would mean the starting point of the hierarchy, and 1 their parents. +/// This number is only available natively if there is a commit-graph. +pub type Generation = u32; + impl<'find, T: std::fmt::Debug> std::fmt::Debug for Graph<'find, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Debug::fmt(&self.set, f) @@ -17,68 +30,17 @@ impl<'find, T: Default> Graph<'find, T> { /// If it wasn't, associate it with the default value. Assure `update_data(data)` gets run. /// Return the commit when done. /// Note that none of the data updates happen if there was no commit named `id`. - pub fn try_lookup_and_insert( + pub fn try_lookup_or_insert( &mut self, id: gix_hash::ObjectId, update_data: impl FnOnce(&mut T), - ) -> Result>, lookup::Error> { - self.try_lookup_and_insert_default(id, || T::default(), update_data) + ) -> Result>, lookup::Error> { + self.try_lookup_or_insert_default(id, T::default, update_data) } } +/// Access and mutation impl<'find, T> Graph<'find, T> { - /// Create a new instance with `find` to retrieve commits and optionally `cache` to accelerate commit access. - /// - /// ### Performance - /// - /// `find` should be optimized to access the same object repeatedly, ideally with an object cache to keep the last couple of - /// most recently used commits. - /// Furthermore, **none-existing commits should not trigger the pack-db to be refreshed.** Otherwise, performance may be sub-optimal - /// in shallow repositories as running into non-existing commits will trigger a refresh of the `packs` directory. - pub fn new(mut find: Find, cache: impl Into>) -> Self - where - Find: - for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result>, E> + 'find, - E: std::error::Error + Send + Sync + 'static, - { - Graph { - find: Box::new(move |id, buf| { - find(id, buf).map_err(|err| Box::new(err) as Box) - }), - cache: cache.into(), - set: gix_hashtable::HashMap::default(), - buf: Vec::new(), - parent_buf: Vec::new(), - } - } - - /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set - /// with a `default` value assigned to it. - /// If it wasn't, associate it with the default value. Assure `update_data(data)` gets run. - /// Return the commit when done. - /// Note that none of the data updates happen if there was no commit named `id`. - pub fn try_lookup_and_insert_default( - &mut self, - id: gix_hash::ObjectId, - default: impl FnOnce() -> T, - update_data: impl FnOnce(&mut T), - ) -> Result>, lookup::Error> { - let res = try_lookup(&id, &mut self.find, self.cache.as_ref(), &mut self.buf)?; - Ok(res.map(|commit| { - match self.set.entry(id) { - gix_hashtable::hash_map::Entry::Vacant(entry) => { - let mut data = default(); - update_data(&mut data); - entry.insert(data); - } - gix_hashtable::hash_map::Entry::Occupied(mut entry) => { - update_data(entry.get_mut()); - } - }; - commit - })) - } - /// Returns true if `id` has data associated with it, meaning that we processed it already. pub fn contains(&self, id: &gix_hash::oid) -> bool { self.set.contains_key(id.as_ref()) @@ -104,18 +66,6 @@ impl<'find, T> Graph<'find, T> { self.set.clear(); } - /// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist. - /// - /// It's possible that commits don't exist if the repository is shallow. - pub fn try_lookup(&mut self, id: &gix_hash::oid) -> Result>, lookup::Error> { - try_lookup(id, &mut self.find, self.cache.as_ref(), &mut self.buf) - } - - /// Lookup `id` and return a handle to it, or fail if it doesn't exist. - pub fn lookup(&mut self, id: &gix_hash::oid) -> Result, lookup::existing::Error> { - self.try_lookup(id)?.ok_or(lookup::existing::Error::Missing) - } - /// Insert the parents of commit named `id` to the graph and associate new parents with data /// by calling `new_parent_data(parent_id, committer_timestamp)`, or update existing parents /// data with `update_existing(parent_id, &mut existing_data)`. @@ -128,10 +78,7 @@ impl<'find, T> Graph<'find, T> { first_parent: bool, ) -> Result<(), insert_parents::Error> { let commit = self.lookup(id)?; - let parents: SmallVec<[_; 2]> = commit - .iter_parents() - .take(if first_parent { 1 } else { usize::MAX }) - .collect(); + let parents: SmallVec<[_; 2]> = commit.iter_parents().collect(); for parent_id in parents { let parent_id = parent_id?; match self.set.entry(parent_id) { @@ -158,22 +105,141 @@ impl<'find, T> Graph<'find, T> { } } +/// Initialization +impl<'find, T> Graph<'find, T> { + /// Create a new instance with `find` to retrieve commits and optionally `cache` to accelerate commit access. + /// + /// ### Performance + /// + /// `find` should be optimized to access the same object repeatedly, ideally with an object cache to keep the last couple of + /// most recently used commits. + /// Furthermore, **none-existing commits should not trigger the pack-db to be refreshed.** Otherwise, performance may be sub-optimal + /// in shallow repositories as running into non-existing commits will trigger a refresh of the `packs` directory. + pub fn new(mut find: Find, cache: impl Into>) -> Self + where + Find: + for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result>, E> + 'find, + E: std::error::Error + Send + Sync + 'static, + { + Graph { + find: Box::new(move |id, buf| { + find(id, buf).map_err(|err| Box::new(err) as Box) + }), + cache: cache.into(), + set: gix_hashtable::HashMap::default(), + buf: Vec::new(), + parent_buf: Vec::new(), + } + } +} + +/// commit access +impl<'find, T> Graph<'find, Commit> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a commit with `new_data()` assigned. + /// `update_data(data)` gets run either on existing or on new data. + /// + /// Note that none of the data updates happen if `id` didn't exist. + pub fn try_lookup_or_insert_commit_default( + &mut self, + id: gix_hash::ObjectId, + new_data: impl FnOnce() -> T, + update_data: impl FnOnce(&mut T), + ) -> Result>, lookup::commit::Error> { + match self.set.entry(id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let res = try_lookup(&id, &mut self.find, self.cache.as_ref(), &mut self.buf)?; + let commit = match res { + None => return Ok(None), + Some(commit) => commit, + }; + let mut commit = commit.to_owned(new_data)?; + update_data(&mut commit.data); + entry.insert(commit); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_data(&mut entry.get_mut().data); + } + }; + Ok(self.set.get_mut(&id)) + } +} + +/// commit access +impl<'find, T: Default> Graph<'find, Commit> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a commit and default data assigned. + /// `update_data(data)` gets run either on existing or on new data. + /// + /// Note that none of the data updates happen if `id` didn't exist. + pub fn try_lookup_or_insert_commit( + &mut self, + id: gix_hash::ObjectId, + update_data: impl FnOnce(&mut T), + ) -> Result>, lookup::commit::Error> { + self.try_lookup_or_insert_commit_default(id, T::default, update_data) + } +} + +/// Lazy commit access +impl<'find, T> Graph<'find, T> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a `default` value assigned to it. + /// `update_data(data)` gets run either on existing or no new data. + /// Return the commit when done. + /// + /// Note that none of the data updates happen if `id` didn't exist. + pub fn try_lookup_or_insert_default( + &mut self, + id: gix_hash::ObjectId, + default: impl FnOnce() -> T, + update_data: impl FnOnce(&mut T), + ) -> Result>, lookup::Error> { + let res = try_lookup(&id, &mut self.find, self.cache.as_ref(), &mut self.buf)?; + Ok(res.map(|commit| { + match self.set.entry(id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let mut data = default(); + update_data(&mut data); + entry.insert(data); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_data(entry.get_mut()); + } + }; + commit + })) + } + + /// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist. + /// + /// It's possible that commits don't exist if the repository is shallow. + pub fn try_lookup(&mut self, id: &gix_hash::oid) -> Result>, lookup::Error> { + try_lookup(id, &mut self.find, self.cache.as_ref(), &mut self.buf) + } + + /// Lookup `id` and return a handle to it, or fail if it doesn't exist. + pub fn lookup(&mut self, id: &gix_hash::oid) -> Result, lookup::existing::Error> { + self.try_lookup(id)?.ok_or(lookup::existing::Error::Missing) + } +} + fn try_lookup<'graph>( id: &gix_hash::oid, find: &mut Box>, cache: Option<&'graph gix_commitgraph::Graph>, buf: &'graph mut Vec, -) -> Result>, lookup::Error> { +) -> Result>, lookup::Error> { if let Some(cache) = cache { if let Some(pos) = cache.lookup(id) { - return Ok(Some(Commit { + return Ok(Some(LazyCommit { backing: Either::Right((cache, pos)), })); } } #[allow(clippy::manual_map)] Ok(match find(id, buf)? { - Some(_) => Some(Commit { + Some(_) => Some(LazyCommit { backing: Either::Left(buf), }), None => None, @@ -188,136 +254,49 @@ impl<'a, 'find, T> Index<&'a gix_hash::oid> for Graph<'find, T> { } } -/// -pub mod lookup { - /// The error returned by [`try_lookup()`][crate::Graph::try_lookup()]. - #[derive(Debug, thiserror::Error)] - #[error("There was an error looking up a commit")] - pub struct Error { - #[from] - source: Box, - } - - /// - pub mod existing { - /// The error returned by [`lookup()`][crate::Graph::lookup()]. - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error(transparent)] - Find(#[from] super::Error), - #[error("Commit could not be found")] - Missing, - } - } +/// A commit that contains all information we can obtain through the commit-graph, which is typically enough to fuel any graph iteration. +pub struct Commit { + /// The parents of the commit. + pub parents: SmallVec<[gix_hash::ObjectId; 1]>, + /// The time at which the commit was created. + pub commit_time: CommitterTimestamp, + /// The generation of the commit, if available. + pub generation: Option, + /// Any kind of data to associate with this commit. + pub data: T, } -/// -pub mod insert_parents { - use crate::graph::commit::iter_parents; - use crate::graph::lookup; - - /// The error returned by [`insert_parents()`][crate::Graph::insert_parents()]. - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error(transparent)] - Lookup(#[from] lookup::existing::Error), - #[error("A commit could not be decoded during traversal")] - Decode(#[from] gix_object::decode::Error), - #[error(transparent)] - Parent(#[from] iter_parents::Error), +impl std::fmt::Debug for Commit +where + T: std::fmt::Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Commit") + .field("parents", &self.parents) + .field("commit_time", &self.commit_time) + .field("generation", &self.generation) + .field("data", &self.data) + .finish() } } -enum Either { - Left(T), - Right(U), -} - -/// A commit that provides access to graph-related information. -pub struct Commit<'graph> { - backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>, -} - -/// -pub mod commit { - use super::Commit; - use crate::graph::{CommitterTimestamp, Either}; - - impl<'graph> Commit<'graph> { - /// Return an iterator over the parents of this commit. - pub fn iter_parents(&self) -> Parents<'graph> { - let backing = match &self.backing { - Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), - Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), - }; - Parents { backing } - } - - /// Returns the timestamp at which this commit was created. - /// - /// This is the single-most important date for determining recency of commits. - /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. - pub fn committer_timestamp(&self) -> Result { - Ok(match &self.backing { - Either::Left(buf) => { - gix_object::CommitRefIter::from_bytes(buf) - .committer()? - .time - .seconds_since_unix_epoch as CommitterTimestamp - } - Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp(), - }) +impl Clone for Commit +where + T: Clone, +{ + fn clone(&self) -> Self { + Commit { + parents: self.parents.clone(), + commit_time: self.commit_time, + generation: self.generation, + data: self.data.clone(), } } +} - /// An iterator over the parents of a commit. - pub struct Parents<'graph> { - backing: Either< - gix_object::CommitRefIter<'graph>, - ( - &'graph gix_commitgraph::Graph, - gix_commitgraph::file::commit::Parents<'graph>, - ), - >, - } - - impl<'graph> Iterator for Parents<'graph> { - type Item = Result; - - fn next(&mut self) -> Option { - match &mut self.backing { - Either::Left(it) => { - for token in it { - match token { - Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, - Ok(gix_object::commit::ref_iter::Token::Parent { id }) => return Some(Ok(id)), - Ok(_unused_token) => break, - Err(err) => return Some(Err(err.into())), - } - } - None - } - Either::Right((cache, it)) => it - .next() - .map(|r| r.map(|pos| cache.id_at(pos).to_owned()).map_err(Into::into)), - } - } - } - - /// - pub mod iter_parents { - /// The error returned by the [`Parents`][super::Parents] iterator. - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error("An error occurred when parsing commit parents")] - DecodeCommit(#[from] gix_object::decode::Error), - #[error("An error occurred when parsing parents from the commit graph")] - DecodeCommitGraph(#[from] gix_commitgraph::file::commit::Error), - } - } +/// A commit that provides access to graph-related information, on demand. +pub struct LazyCommit<'graph> { + backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>, } pub(crate) type FindFn<'find> = dyn for<'a> FnMut( @@ -326,3 +305,8 @@ pub(crate) type FindFn<'find> = dyn for<'a> FnMut( ) -> Result>, Box> + 'find; + +enum Either { + Left(T), + Right(U), +} diff --git a/gix-revision/src/lib.rs b/gix-revision/src/lib.rs index 20eea37146..463580e63a 100644 --- a/gix-revision/src/lib.rs +++ b/gix-revision/src/lib.rs @@ -14,18 +14,31 @@ pub use describe::function::describe; /// pub mod spec; - -mod types; -use crate::graph::FindFn; -pub use types::Spec; +pub use spec::types::Spec; /// A graph of commits which additionally allows to associate data with commits. /// /// It starts empty, but each access may fill it with commit information. /// Note that the traversal can be accelerated if a [commit-graph][gix_commitgraph::Graph] is also made available. +/// +/// ### About replacements +/// +/// Object replacements is an object database feature to substitute one object with another. We assume that this is transparently +/// implemented by the `find` function that returns objects. Also we assume that the commitgraph as been written with replacements +/// active to provide a consistent view. +/// +/// ### Odb or `find` configuration +/// +/// The `find` handle should be setup to *quickly determine if an object exists or not* to assure quick operation *on shallow repositories*. +/// This typically means that it should not re-read the odb if there is an object miss. +/// +/// Most usage of the Graph will benefit from fast ODB lookups, so setting up an object cache will be beneficial. If that's not the case, +/// the method docs will inform about that. +/// +/// Additionally, there is *no need for an object cache* as we keep track of everything we traverse in our own hashmap. pub struct Graph<'find, T> { /// A way to resolve a commit from the object database. - find: Box>, + find: Box>, /// A way to speedup commit access, essentially a multi-file commit database. cache: Option, /// The set of cached commits that we have seen once, along with data associated with them. diff --git a/gix-revision/src/queue.rs b/gix-revision/src/queue.rs index 5d24e1638b..32894d45c3 100644 --- a/gix-revision/src/queue.rs +++ b/gix-revision/src/queue.rs @@ -39,6 +39,29 @@ impl Ord for Item { } } +impl Clone for Item +where + K: Clone, + T: Clone, +{ + fn clone(&self) -> Self { + Item { + key: self.key.clone(), + value: self.value.clone(), + } + } +} + +impl Clone for PriorityQueue +where + K: Clone + Ord, + T: Clone, +{ + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + impl PriorityQueue { /// Create a new instance. pub fn new() -> Self { @@ -55,14 +78,24 @@ impl PriorityQueue { } /// Iterate all items ordered from highest to lowest priority. - pub fn iter_random(&self) -> impl Iterator { + pub fn iter_unordered(&self) -> impl Iterator { self.0.iter().map(|t| &t.value) } + /// Turn this instance into an iterator over its keys and values in arbitrary order. + pub fn into_iter_unordered(self) -> impl Iterator { + self.0.into_vec().into_iter().map(|item| (item.key, item.value)) + } + /// Return true if the queue is empty. pub fn is_empty(&self) -> bool { self.0.is_empty() } + + /// Returns the greatest item `(K, T)` tuple, as ordered by `K` if the queue is not empty. + pub fn peek(&self) -> Option<(&K, &T)> { + self.0.peek().map(|e| (&e.key, &e.value)) + } } impl FromIterator<(K, T)> for PriorityQueue { diff --git a/gix-revision/src/spec/mod.rs b/gix-revision/src/spec/mod.rs index c2df7bc1e6..616ad3a26b 100644 --- a/gix-revision/src/spec/mod.rs +++ b/gix-revision/src/spec/mod.rs @@ -53,6 +53,57 @@ mod _impls { } } +pub(crate) mod types { + /// A revision specification without any bindings to a repository, useful for serialization or movement over thread boundaries. + /// + /// Note that all [object ids][gix_hash::ObjectId] should be a committish, but don't have to be. + /// Unless the field name contains `_exclusive`, the respective objects are included in the set. + #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub enum Spec { + /// Include commits reachable from this revision, i.e. `a` and its ancestors. + /// + /// The equivalent to [crate::spec::Kind::IncludeReachable], but with data. + Include(gix_hash::ObjectId), + /// Exclude commits reachable from this revision, i.e. `a` and its ancestors. Example: `^a`. + /// + /// The equivalent to [crate::spec::Kind::ExcludeReachable], but with data. + Exclude(gix_hash::ObjectId), + /// Every commit that is reachable from `from` to `to`, but not any ancestors of `from`. Example: `from..to`. + /// + /// The equivalent to [crate::spec::Kind::RangeBetween], but with data. + Range { + /// The starting point of the range, which is included in the set. + from: gix_hash::ObjectId, + /// The end point of the range, which is included in the set. + to: gix_hash::ObjectId, + }, + /// Every commit reachable through either `theirs` or `ours`, but no commit that is reachable by both. Example: `theirs...ours`. + /// + /// The equivalent to [crate::spec::Kind::ReachableToMergeBase], but with data. + Merge { + /// Their side of the merge, which is included in the set. + theirs: gix_hash::ObjectId, + /// Our side of the merge, which is included in the set. + ours: gix_hash::ObjectId, + }, + /// Include every commit of all parents of `a`, but not `a` itself. Example: `a^@`. + /// + /// The equivalent to [crate::spec::Kind::IncludeReachableFromParents], but with data. + IncludeOnlyParents( + /// Include only the parents of this object, but not the object itself. + gix_hash::ObjectId, + ), + /// Exclude every commit of all parents of `a`, but not `a` itself. Example: `a^!`. + /// + /// The equivalent to [crate::spec::Kind::ExcludeReachableFromParents], but with data. + ExcludeParents( + /// Exclude the parents of this object, but not the object itself. + gix_hash::ObjectId, + ), + } +} + /// pub mod parse; pub use parse::function::parse; diff --git a/gix-revision/src/types.rs b/gix-revision/src/types.rs deleted file mode 100644 index 374d43e200..0000000000 --- a/gix-revision/src/types.rs +++ /dev/null @@ -1,48 +0,0 @@ -/// A revision specification without any bindings to a repository, useful for serialization or movement over thread boundaries. -/// -/// Note that all [object ids][gix_hash::ObjectId] should be a committish, but don't have to be. -/// Unless the field name contains `_exclusive`, the respective objects are included in the set. -#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum Spec { - /// Include commits reachable from this revision, i.e. `a` and its ancestors. - /// - /// The equivalent to [crate::spec::Kind::IncludeReachable], but with data. - Include(gix_hash::ObjectId), - /// Exclude commits reachable from this revision, i.e. `a` and its ancestors. Example: `^a`. - /// - /// The equivalent to [crate::spec::Kind::ExcludeReachable], but with data. - Exclude(gix_hash::ObjectId), - /// Every commit that is reachable from `from` to `to`, but not any ancestors of `from`. Example: `from..to`. - /// - /// The equivalent to [crate::spec::Kind::RangeBetween], but with data. - Range { - /// The starting point of the range, which is included in the set. - from: gix_hash::ObjectId, - /// The end point of the range, which is included in the set. - to: gix_hash::ObjectId, - }, - /// Every commit reachable through either `theirs` or `ours`, but no commit that is reachable by both. Example: `theirs...ours`. - /// - /// The equivalent to [crate::spec::Kind::ReachableToMergeBase], but with data. - Merge { - /// Their side of the merge, which is included in the set. - theirs: gix_hash::ObjectId, - /// Our side of the merge, which is included in the set. - ours: gix_hash::ObjectId, - }, - /// Include every commit of all parents of `a`, but not `a` itself. Example: `a^@`. - /// - /// The equivalent to [crate::spec::Kind::IncludeReachableFromParents], but with data. - IncludeOnlyParents( - /// Include only the parents of this object, but not the object itself. - gix_hash::ObjectId, - ), - /// Exclude every commit of all parents of `a`, but not `a` itself. Example: `a^!`. - /// - /// The equivalent to [crate::spec::Kind::ExcludeReachableFromParents], but with data. - ExcludeParents( - /// Exclude the parents of this object, but not the object itself. - gix_hash::ObjectId, - ), -} diff --git a/gix-revision/tests/graph/mod.rs b/gix-revision/tests/graph/mod.rs new file mode 100644 index 0000000000..3b03dee2f6 --- /dev/null +++ b/gix-revision/tests/graph/mod.rs @@ -0,0 +1,8 @@ +#[test] +fn size_of_commit() { + assert_eq!( + std::mem::size_of::>(), + 48, + "We might see quite a lot of these, so they shouldn't grow unexpectedly" + ) +} diff --git a/gix-revision/tests/revision.rs b/gix-revision/tests/revision.rs index 71d63cb405..1b469d3e90 100644 --- a/gix-revision/tests/revision.rs +++ b/gix-revision/tests/revision.rs @@ -1,4 +1,5 @@ mod describe; +mod graph; mod spec; pub type Result = std::result::Result>; From 1bd93bedd2f184510239c50c345d3dbc41d7d13b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 31 May 2023 20:34:16 +0200 Subject: [PATCH 07/16] feat: allow graph sharing by unifying `Flags` type. This makes the graph used in `gix-negotiate` shareable by callers, which can do their own traversal and store their own flags. The knowlege of this traversal can be kept using such shared flags, like the `PARSED` bit which should be set whenever parents are traversed. That way we are able to emulate the algorithms git uses perfectly, as we keep exactly the same state. --- gix-negotiate/src/consecutive.rs | 156 +++++++++++--------------- gix-negotiate/src/lib.rs | 106 ++++++++++++------ gix-negotiate/src/noop.rs | 8 +- gix-negotiate/src/skipping.rs | 166 +++++++++++----------------- gix-negotiate/tests/baseline/mod.rs | 30 +++-- gix-negotiate/tests/negotiate.rs | 9 ++ gix-revision/src/graph/errors.rs | 2 +- gix-revision/src/lib.rs | 3 +- 8 files changed, 238 insertions(+), 242 deletions(-) diff --git a/gix-negotiate/src/consecutive.rs b/gix-negotiate/src/consecutive.rs index a813bc207e..87231a7ca3 100644 --- a/gix-negotiate/src/consecutive.rs +++ b/gix-negotiate/src/consecutive.rs @@ -1,49 +1,35 @@ -use crate::{Error, Negotiator}; +use crate::{Error, Flags, Negotiator}; use gix_hash::ObjectId; use gix_revision::graph::CommitterTimestamp; -use smallvec::SmallVec; -bitflags::bitflags! { - /// Whether something can be read or written. - #[derive(Debug, Default, Copy, Clone)] - pub struct Flags: u8 { - /// The revision is known to be in common with the remote. - const COMMON = 1 << 0; - /// The revision is common and was set by merit of a remote tracking ref (e.g. `refs/heads/origin/main`). - const COMMON_REF = 1 << 1; - /// The revision has entered the priority queue. - const SEEN = 1 << 2; - /// The revision was popped off our primary priority queue, used to avoid double-counting of `non_common_revs` - const POPPED = 1 << 3; - } -} -pub(crate) struct Algorithm<'find> { - graph: gix_revision::Graph<'find, Flags>, +pub(crate) struct Algorithm { revs: gix_revision::PriorityQueue, non_common_revs: usize, } -impl<'a> Algorithm<'a> { - pub fn new(graph: gix_revision::Graph<'a, Flags>) -> Self { +impl Default for Algorithm { + fn default() -> Self { Self { - graph, revs: gix_revision::PriorityQueue::new(), non_common_revs: 0, } } +} +impl Algorithm { /// Add `id` to our priority queue and *add* `flags` to it. - fn add_to_queue(&mut self, id: ObjectId, mark: Flags) -> Result<(), Error> { + fn add_to_queue(&mut self, id: ObjectId, mark: Flags, graph: &mut crate::Graph<'_>) -> Result<(), Error> { let mut is_common = false; - if self.graph.get(&id).map_or(false, |flags| flags.intersects(mark)) { - return Ok(()); - } - let commit = self.graph.try_lookup_and_insert(id, |current| { - *current |= mark; - is_common = current.contains(Flags::COMMON); - })?; - if let Some(timestamp) = commit.map(|c| c.committer_timestamp()).transpose()? { - self.revs.insert(timestamp, id); + let mut has_mark = false; + if let Some(commit) = graph + .try_lookup_or_insert_commit(id, |data| { + has_mark = data.flags.intersects(mark); + data.flags |= mark; + is_common = data.flags.contains(Flags::COMMON); + })? + .filter(|_| !has_mark) + { + self.revs.insert(commit.commit_time, id); if !is_common { self.non_common_revs += 1; } @@ -51,43 +37,46 @@ impl<'a> Algorithm<'a> { Ok(()) } - fn mark_common(&mut self, id: ObjectId, mode: Mark, ancestors: Ancestors) -> Result<(), Error> { + fn mark_common( + &mut self, + id: ObjectId, + mode: Mark, + ancestors: Ancestors, + graph: &mut crate::Graph<'_>, + ) -> Result<(), Error> { let mut is_common = false; - if let Some(commit) = self - .graph - .try_lookup_and_insert(id, |current| is_common = current.contains(Flags::COMMON))? + if let Some(commit) = graph + .try_lookup_or_insert_commit(id, |data| is_common = data.flags.contains(Flags::COMMON))? .filter(|_| !is_common) { - let mut queue = - gix_revision::PriorityQueue::from_iter(Some((commit.committer_timestamp()?, (id, 0_usize)))); + let mut queue = gix_revision::PriorityQueue::from_iter(Some((commit.commit_time, (id, 0_usize)))); if let Mark::ThisCommitAndAncestors = mode { - let current = self.graph.get_mut(&id).expect("just inserted"); - *current |= Flags::COMMON; - if current.contains(Flags::SEEN) && !current.contains(Flags::POPPED) { + commit.data.flags |= Flags::COMMON; + if commit.data.flags.contains(Flags::SEEN) && !commit.data.flags.contains(Flags::POPPED) { self.non_common_revs -= 1; } } - let mut parents = SmallVec::new(); while let Some((id, generation)) = queue.pop() { - if self.graph.get(&id).map_or(true, |d| !d.contains(Flags::SEEN)) { - self.add_to_queue(id, Flags::SEEN)?; + if graph + .get(&id) + .map_or(true, |commit| !commit.data.flags.contains(Flags::SEEN)) + { + self.add_to_queue(id, Flags::SEEN, graph)?; } else if matches!(ancestors, Ancestors::AllUnseen) || generation < 2 { - if let Some(commit) = self.graph.try_lookup_and_insert(id, |_| {})? { - collect_parents(commit.iter_parents(), &mut parents)?; - for parent_id in parents.drain(..) { + if let Some(commit) = graph.try_lookup_or_insert_commit(id, |_| {})? { + for parent_id in commit.parents.clone() { let mut prev_flags = Flags::default(); - if let Some(parent) = self - .graph - .try_lookup_and_insert(parent_id, |d| { - prev_flags = *d; - *d |= Flags::COMMON; + if let Some(parent) = graph + .try_lookup_or_insert_commit(parent_id, |data| { + prev_flags = data.flags; + data.flags |= Flags::COMMON; })? .filter(|_| !prev_flags.contains(Flags::COMMON)) { if prev_flags.contains(Flags::SEEN) && !prev_flags.contains(Flags::POPPED) { self.non_common_revs -= 1; } - queue.insert(parent.committer_timestamp()?, (parent_id, generation + 1)) + queue.insert(parent.commit_time, (parent_id, generation + 1)) } } } @@ -98,38 +87,27 @@ impl<'a> Algorithm<'a> { } } -pub(crate) fn collect_parents( - parents: gix_revision::graph::commit::Parents<'_>, - out: &mut SmallVec<[ObjectId; 2]>, -) -> Result<(), Error> { - out.clear(); - for parent in parents { - out.push(parent.map_err(|err| match err { - gix_revision::graph::commit::iter_parents::Error::DecodeCommit(err) => Error::DecodeCommit(err), - gix_revision::graph::commit::iter_parents::Error::DecodeCommitGraph(err) => Error::DecodeCommitInGraph(err), - })?); - } - Ok(()) -} - -impl<'a> Negotiator for Algorithm<'a> { - fn known_common(&mut self, id: ObjectId) -> Result<(), Error> { - if self.graph.get(&id).map_or(true, |d| !d.contains(Flags::SEEN)) { - self.add_to_queue(id, Flags::COMMON_REF | Flags::SEEN)?; - self.mark_common(id, Mark::AncestorsOnly, Ancestors::DirectUnseen)?; +impl Negotiator for Algorithm { + fn known_common(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result<(), Error> { + if graph + .get(&id) + .map_or(true, |commit| !commit.data.flags.contains(Flags::SEEN)) + { + self.add_to_queue(id, Flags::COMMON_REF | Flags::SEEN, graph)?; + self.mark_common(id, Mark::AncestorsOnly, Ancestors::DirectUnseen, graph)?; } Ok(()) } - fn add_tip(&mut self, id: ObjectId) -> Result<(), Error> { - self.add_to_queue(id, Flags::SEEN) + fn add_tip(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result<(), Error> { + self.add_to_queue(id, Flags::SEEN, graph) } - fn next_have(&mut self) -> Option> { - let mut parents = SmallVec::new(); + fn next_have(&mut self, graph: &mut crate::Graph<'_>) -> Option> { loop { let id = self.revs.pop().filter(|_| self.non_common_revs != 0)?; - let flags = self.graph.get_mut(&id).expect("it was added to the graph by now"); + let commit = graph.get_mut(&id).expect("it was added to the graph by now"); + let flags = &mut commit.data.flags; *flags |= Flags::POPPED; if !flags.contains(Flags::COMMON) { @@ -144,21 +122,17 @@ impl<'a> Negotiator for Algorithm<'a> { (Some(id), Flags::SEEN) }; - let commit = match self.graph.try_lookup(&id) { - Ok(c) => c.expect("it was found before, must still be there"), - Err(err) => return Some(Err(err.into())), - }; - if let Err(err) = collect_parents(commit.iter_parents(), &mut parents) { - return Some(Err(err)); - } - for parent_id in parents.drain(..) { - if self.graph.get(&parent_id).map_or(true, |d| !d.contains(Flags::SEEN)) { - if let Err(err) = self.add_to_queue(parent_id, mark) { + for parent_id in commit.parents.clone() { + if graph + .get(&parent_id) + .map_or(true, |commit| !commit.data.flags.contains(Flags::SEEN)) + { + if let Err(err) = self.add_to_queue(parent_id, mark, graph) { return Some(Err(err)); } } if mark.contains(Flags::COMMON) { - if let Err(err) = self.mark_common(parent_id, Mark::AncestorsOnly, Ancestors::AllUnseen) { + if let Err(err) = self.mark_common(parent_id, Mark::AncestorsOnly, Ancestors::AllUnseen, graph) { return Some(Err(err)); } } @@ -170,9 +144,11 @@ impl<'a> Negotiator for Algorithm<'a> { } } - fn in_common_with_remote(&mut self, id: ObjectId) -> Result { - let known_to_be_common = self.graph.get(&id).map_or(false, |d| d.contains(Flags::COMMON)); - self.mark_common(id, Mark::ThisCommitAndAncestors, Ancestors::DirectUnseen)?; + fn in_common_with_remote(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result { + let known_to_be_common = graph + .get(&id) + .map_or(false, |commit| commit.data.flags.contains(Flags::COMMON)); + self.mark_common(id, Mark::ThisCommitAndAncestors, Ancestors::DirectUnseen, graph)?; Ok(known_to_be_common) } } diff --git a/gix-negotiate/src/lib.rs b/gix-negotiate/src/lib.rs index c207b6861a..b05f944f88 100644 --- a/gix-negotiate/src/lib.rs +++ b/gix-negotiate/src/lib.rs @@ -2,11 +2,64 @@ //! the pack it sends to only contain what we don't have. #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] - mod consecutive; mod noop; mod skipping; +bitflags::bitflags! { + /// Multi purpose, shared flags that are used by negotiation algorithms and by the caller as well + /// + /// However, in this crate we can't implement the calling side, so we marry this type to whatever is needed in downstream crates. + #[derive(Debug, Default, Copy, Clone)] + pub struct Flags: u8 { + /// The object is already available locally and doesn't need to be fetched by the remote. + const COMPLETE = 1 << 0; + /// A commit from an alternate object database. + const ALTERNATE = 1 << 1; + + /// The revision is known to be in common with the remote. + /// + /// Used by `consecutive` and `skipping`. + const COMMON = 1 << 2; + /// The revision has entered the priority queue. + /// + /// Used by `consecutive` and `skipping`. + const SEEN = 1 << 3; + /// The revision was popped off our primary priority queue, used to avoid double-counting of `non_common_revs` + /// + /// Used by `consecutive` and `skipping`. + const POPPED = 1 << 4; + + /// The revision is common and was set by merit of a remote tracking ref (e.g. `refs/heads/origin/main`). + /// + /// Used by `consecutive`. + const COMMON_REF = 1 << 5; + + /// The remote let us know it has the object. We still have to tell the server we have this object or one of its descendants. + /// We won't tell the server about its ancestors. + /// + /// Used by `skipping`. + const ADVERTISED = 1 << 6; + } +} + +/// Additional data to store with each commit when used by any of our algorithms. +/// +/// It's shared among those who use the [`Negotiator`] trait, and all implementations of it. +#[derive(Default, Copy, Clone)] +pub struct Metadata { + /// Used by `skipping`. + /// Only used if commit is not COMMON + pub original_ttl: u16, + /// Used by `skipping`. + pub ttl: u16, + /// Additional information about each commit + pub flags: Flags, +} + +/// The graph our callers use to store traversal information, for (re-)use in the negotiation implementation. +pub type Graph<'find> = gix_revision::Graph<'find, gix_revision::graph::Commit>; + /// The way the negotiation is performed. #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] pub enum Algorithm { @@ -19,6 +72,17 @@ pub enum Algorithm { Skipping, } +impl std::fmt::Display for Algorithm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Algorithm::Noop => "noop", + Algorithm::Consecutive => "consecutive", + Algorithm::Skipping => "skipping", + } + .fmt(f) + } +} + /// Calculate how many `HAVE` lines we may send in one round, with variation depending on whether the `transport_is_stateless` or not. /// `window_size` is the previous (or initial) value of the window size. pub fn window_size(transport_is_stateless: bool, window_size: impl Into>) -> usize { @@ -44,26 +108,11 @@ pub fn window_size(transport_is_stateless: bool, window_size: impl Into( - self, - find: Find, - cache: impl Into>, - ) -> Box - where - Find: - for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result>, E> + 'find, - E: std::error::Error + Send + Sync + 'static, - { + pub fn into_negotiator(self) -> Box { match &self { Algorithm::Noop => Box::new(noop::Noop) as Box, - Algorithm::Consecutive => { - let graph = gix_revision::Graph::<'_, consecutive::Flags>::new(find, cache); - Box::new(consecutive::Algorithm::new(graph)) - } - Algorithm::Skipping => { - let graph = gix_revision::Graph::<'_, skipping::Entry>::new(find, cache); - Box::new(skipping::Algorithm::new(graph)) - } + Algorithm::Consecutive => Box::::default(), + Algorithm::Skipping => Box::::default(), } } } @@ -73,32 +122,23 @@ pub trait Negotiator { /// Mark `id` as common between the remote and us. /// /// These ids are typically the local tips of remote tracking branches. - fn known_common(&mut self, id: gix_hash::ObjectId) -> Result<(), Error>; + fn known_common(&mut self, id: gix_hash::ObjectId, graph: &mut Graph<'_>) -> Result<(), Error>; /// Add `id` as starting point of a traversal across commits that aren't necessarily common between the remote and us. /// /// These tips are usually the commits of local references whose tips should lead to objects that we have in common with the remote. - fn add_tip(&mut self, id: gix_hash::ObjectId) -> Result<(), Error>; + fn add_tip(&mut self, id: gix_hash::ObjectId, graph: &mut Graph<'_>) -> Result<(), Error>; /// Produce the next id of an object that we want the server to know we have. It's an object we don't know we have in common or not. /// /// Returns `None` if we have exhausted all options, which might mean we have traversed the entire commit graph. - fn next_have(&mut self) -> Option>; + fn next_have(&mut self, graph: &mut Graph<'_>) -> Option>; /// Mark `id` as being common with the remote (as informed by the remote itself) and return `true` if we knew it was common already. /// /// We can assume to have already seen `id` as we were the one to inform the remote in a prior `have`. - fn in_common_with_remote(&mut self, id: gix_hash::ObjectId) -> Result; + fn in_common_with_remote(&mut self, id: gix_hash::ObjectId, graph: &mut Graph<'_>) -> Result; } /// An error that happened during any of the methods on a [`Negotiator`]. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error(transparent)] - DecodeCommit(#[from] gix_object::decode::Error), - #[error(transparent)] - DecodeCommitInGraph(#[from] gix_commitgraph::file::commit::Error), - #[error(transparent)] - LookupCommitInGraph(#[from] gix_revision::graph::lookup::Error), -} +pub type Error = gix_revision::graph::lookup::commit::Error; diff --git a/gix-negotiate/src/noop.rs b/gix-negotiate/src/noop.rs index 92ad101860..d5ebf5c0e8 100644 --- a/gix-negotiate/src/noop.rs +++ b/gix-negotiate/src/noop.rs @@ -4,19 +4,19 @@ use gix_hash::ObjectId; pub(crate) struct Noop; impl Negotiator for Noop { - fn known_common(&mut self, _id: ObjectId) -> Result<(), Error> { + fn known_common(&mut self, _id: ObjectId, _graph: &mut crate::Graph<'_>) -> Result<(), Error> { Ok(()) } - fn add_tip(&mut self, _id: ObjectId) -> Result<(), Error> { + fn add_tip(&mut self, _id: ObjectId, _graph: &mut crate::Graph<'_>) -> Result<(), Error> { Ok(()) } - fn next_have(&mut self) -> Option> { + fn next_have(&mut self, _graph: &mut crate::Graph<'_>) -> Option> { None } - fn in_common_with_remote(&mut self, _id: ObjectId) -> Result { + fn in_common_with_remote(&mut self, _id: ObjectId, _graph: &mut crate::Graph<'_>) -> Result { Ok(false) } } diff --git a/gix-negotiate/src/skipping.rs b/gix-negotiate/src/skipping.rs index 719e3cf9a5..a12da31e24 100644 --- a/gix-negotiate/src/skipping.rs +++ b/gix-negotiate/src/skipping.rs @@ -1,53 +1,28 @@ -use crate::consecutive::collect_parents; -use crate::{Error, Negotiator}; +use crate::{Error, Flags, Metadata, Negotiator}; use gix_hash::ObjectId; use gix_revision::graph::CommitterTimestamp; -use smallvec::SmallVec; -bitflags::bitflags! { - /// Whether something can be read or written. - #[derive(Debug, Default, Copy, Clone)] - pub struct Flags: u8 { - /// The revision is known to be in common with the remote. - const COMMON = 1 << 0; - /// The remote let us know it has the object. We still have to tell the server we have this object or one of its descendants. - /// We won't tell the server about its ancestors. - const ADVERTISED = 1 << 1; - /// The revision has at one point entered the priority queue (even though it might not be on it anymore). - const SEEN = 1 << 2; - /// The revision was popped off our priority queue. - const POPPED = 1 << 3; - } -} -#[derive(Default, Copy, Clone)] -pub(crate) struct Entry { - flags: Flags, - /// Only used if commit is not COMMON - original_ttl: u16, - ttl: u16, -} - -pub(crate) struct Algorithm<'find> { - graph: gix_revision::Graph<'find, Entry>, +pub(crate) struct Algorithm { revs: gix_revision::PriorityQueue, non_common_revs: usize, } -impl<'a> Algorithm<'a> { - pub fn new(graph: gix_revision::Graph<'a, Entry>) -> Self { +impl Default for Algorithm { + fn default() -> Self { Self { - graph, revs: gix_revision::PriorityQueue::new(), non_common_revs: 0, } } +} +impl Algorithm { /// Add `id` to our priority queue and *add* `flags` to it. - fn add_to_queue(&mut self, id: ObjectId, mark: Flags) -> Result<(), Error> { - let commit = self.graph.try_lookup_and_insert(id, |entry| { + fn add_to_queue(&mut self, id: ObjectId, mark: Flags, graph: &mut crate::Graph<'_>) -> Result<(), Error> { + let commit = graph.try_lookup_or_insert_commit(id, |entry| { entry.flags |= mark | Flags::SEEN; })?; - if let Some(timestamp) = commit.map(|c| c.committer_timestamp()).transpose()? { + if let Some(timestamp) = commit.map(|c| c.commit_time) { self.revs.insert(timestamp, id); if !mark.contains(Flags::COMMON) { self.non_common_revs += 1; @@ -56,47 +31,38 @@ impl<'a> Algorithm<'a> { Ok(()) } - fn mark_common(&mut self, id: ObjectId) -> Result<(), Error> { + fn mark_common(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result<(), Error> { let mut is_common = false; - if let Some(commit) = self - .graph - .try_lookup_and_insert(id, |entry| { + if let Some(commit) = graph + .try_lookup_or_insert_commit(id, |entry| { is_common = entry.flags.contains(Flags::COMMON); entry.flags |= Flags::COMMON; })? .filter(|_| !is_common) { - let mut queue = gix_revision::PriorityQueue::from_iter(Some((commit.committer_timestamp()?, id))); - let mut parents = SmallVec::new(); + let mut queue = gix_revision::PriorityQueue::from_iter(Some((commit.commit_time, id))); while let Some(id) = queue.pop() { - // This is a bit of a problem as there is no representation of the `parsed` based skip, which probably - // prevents this traversal from going on for too long. There is no equivalent here, but when artificially - // limiting the traversal depth the tests fail as they actually require the traversal to happen. - if self - .graph - .get(&id) - .map_or(false, |entry| entry.flags.contains(Flags::POPPED)) - { - self.non_common_revs = self.non_common_revs.saturating_sub(1); - } - if let Some(commit) = self.graph.try_lookup_and_insert(id, |entry| { + if let Some(commit) = graph.try_lookup_or_insert_commit(id, |entry| { if !entry.flags.contains(Flags::POPPED) { self.non_common_revs -= 1; } })? { - collect_parents(commit.iter_parents(), &mut parents)?; - for parent_id in parents.drain(..) { + for parent_id in commit.parents.clone() { + // This is a bit of a problem as there is no representation of the `parsed` based skip. However, + // We assume that parents that aren't in the graph yet haven't been seen, and that's all we need. + if !graph.contains(&parent_id) { + continue; + } let mut was_unseen_or_common = false; - if let Some(parent) = self - .graph - .try_lookup_and_insert(parent_id, |entry| { + if let Some(parent) = graph + .try_lookup_or_insert_commit(parent_id, |entry| { was_unseen_or_common = !entry.flags.contains(Flags::SEEN) || entry.flags.contains(Flags::COMMON); entry.flags |= Flags::COMMON })? .filter(|_| !was_unseen_or_common) { - queue.insert(parent.committer_timestamp()?, parent_id); + queue.insert(parent.commit_time, parent_id); } } } @@ -105,25 +71,29 @@ impl<'a> Algorithm<'a> { Ok(()) } - fn push_parent(&mut self, entry: Entry, parent_id: ObjectId) -> Result { + fn push_parent( + &mut self, + entry: Metadata, + parent_id: ObjectId, + graph: &mut crate::Graph<'_>, + ) -> Result { let mut was_seen = false; - if let Some(parent_entry) = self - .graph + if let Some(parent) = graph .get(&parent_id) - .map(|entry| { - was_seen = entry.flags.contains(Flags::SEEN); - entry + .map(|parent| { + was_seen = parent.data.flags.contains(Flags::SEEN); + parent }) .filter(|_| was_seen) { - if parent_entry.flags.contains(Flags::POPPED) { + if parent.data.flags.contains(Flags::POPPED) { return Ok(false); } } else { - self.add_to_queue(parent_id, Flags::default())?; + self.add_to_queue(parent_id, Flags::default(), graph)?; } if entry.flags.intersects(Flags::COMMON | Flags::ADVERTISED) { - self.mark_common(parent_id)?; + self.mark_common(parent_id, graph)?; } else { let new_original_ttl = if entry.ttl > 0 { entry.original_ttl @@ -131,71 +101,61 @@ impl<'a> Algorithm<'a> { entry.original_ttl * 3 / 2 + 1 }; let new_ttl = if entry.ttl > 0 { entry.ttl - 1 } else { new_original_ttl }; - let parent_entry = self.graph.get_mut(&parent_id).expect("present or inserted"); - if parent_entry.original_ttl < new_original_ttl { - parent_entry.original_ttl = new_original_ttl; - parent_entry.ttl = new_ttl; + let parent = graph.get_mut(&parent_id).expect("present or inserted"); + if parent.data.original_ttl < new_original_ttl { + parent.data.original_ttl = new_original_ttl; + parent.data.ttl = new_ttl; } } Ok(true) } } -impl<'a> Negotiator for Algorithm<'a> { - fn known_common(&mut self, id: ObjectId) -> Result<(), Error> { - if self - .graph +impl Negotiator for Algorithm { + fn known_common(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result<(), Error> { + if graph .get(&id) - .map_or(false, |entry| entry.flags.contains(Flags::SEEN)) + .map_or(false, |commit| commit.data.flags.contains(Flags::SEEN)) { return Ok(()); } - self.add_to_queue(id, Flags::ADVERTISED) + self.add_to_queue(id, Flags::ADVERTISED, graph) } - fn add_tip(&mut self, id: ObjectId) -> Result<(), Error> { - if self - .graph + fn add_tip(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result<(), Error> { + if graph .get(&id) - .map_or(false, |entry| entry.flags.contains(Flags::SEEN)) + .map_or(false, |commit| commit.data.flags.contains(Flags::SEEN)) { return Ok(()); } - self.add_to_queue(id, Flags::default()) + self.add_to_queue(id, Flags::default(), graph) } - fn next_have(&mut self) -> Option> { - let mut parents = SmallVec::new(); + fn next_have(&mut self, graph: &mut crate::Graph<'_>) -> Option> { loop { let id = self.revs.pop().filter(|_| self.non_common_revs != 0)?; - let entry = self.graph.get_mut(&id).expect("it was added to the graph by now"); - entry.flags |= Flags::POPPED; + let commit = graph.get_mut(&id).expect("it was added to the graph by now"); + commit.data.flags |= Flags::POPPED; - if !entry.flags.contains(Flags::COMMON) { + if !commit.data.flags.contains(Flags::COMMON) { self.non_common_revs -= 1; } let mut to_send = None; - if !entry.flags.contains(Flags::COMMON) && entry.ttl == 0 { + if !commit.data.flags.contains(Flags::COMMON) && commit.data.ttl == 0 { to_send = Some(id); } - let entry = *entry; - let commit = match self.graph.try_lookup(&id) { - Ok(c) => c.expect("it was found before, must still be there"), - Err(err) => return Some(Err(err.into())), - }; - if let Err(err) = collect_parents(commit.iter_parents(), &mut parents) { - return Some(Err(err)); - } + let data = commit.data; let mut parent_pushed = false; - for parent_id in parents.drain(..) { - parent_pushed |= match self.push_parent(entry, parent_id) { + for parent_id in commit.parents.clone() { + parent_pushed |= match self.push_parent(data, parent_id, graph) { Ok(r) => r, Err(err) => return Some(Err(err)), } } - if !entry.flags.contains(Flags::COMMON) && !parent_pushed { + if !data.flags.contains(Flags::COMMON) && !parent_pushed { to_send = Some(id); } @@ -205,17 +165,17 @@ impl<'a> Negotiator for Algorithm<'a> { } } - fn in_common_with_remote(&mut self, id: ObjectId) -> Result { + fn in_common_with_remote(&mut self, id: ObjectId, graph: &mut crate::Graph<'_>) -> Result { let mut was_seen = false; - let known_to_be_common = self.graph.get(&id).map_or(false, |entry| { - was_seen = entry.flags.contains(Flags::SEEN); - entry.flags.contains(Flags::COMMON) + let known_to_be_common = graph.get(&id).map_or(false, |commit| { + was_seen = commit.data.flags.contains(Flags::SEEN); + commit.data.flags.contains(Flags::COMMON) }); assert!( was_seen, "Cannot receive ACK for commit we didn't send a HAVE for: {id}" ); - self.mark_common(id)?; + self.mark_common(id, graph)?; Ok(known_to_be_common) } } diff --git a/gix-negotiate/tests/baseline/mod.rs b/gix-negotiate/tests/baseline/mod.rs index fd3063aa23..abfc845d04 100644 --- a/gix-negotiate/tests/baseline/mod.rs +++ b/gix-negotiate/tests/baseline/mod.rs @@ -56,11 +56,12 @@ fn run() -> crate::Result { .to_owned() }; + let debug = false; for use_cache in [false, true] { let cache = use_cache .then(|| gix_commitgraph::at(store.store_ref().path().join("info")).ok()) .flatten(); - let mut negotiator = algo.into_negotiator( + let mut graph = gix_revision::Graph::new( |id, buf| { store .try_find(id, buf) @@ -68,7 +69,10 @@ fn run() -> crate::Result { }, cache, ); - eprintln!("ALGO {algo_name} CASE {case}"); + let mut negotiator = algo.into_negotiator(); + if debug { + eprintln!("ALGO {algo_name} CASE {case}"); + } // // In --negotiate-only mode, which seems to be the only thing that's working after trying --dry-run, we unfortunately // // don't get to see what happens if known-common commits are added as git itself doesn't do that in this mode // // for some reason. @@ -82,8 +86,10 @@ fn run() -> crate::Result { .filter_map(Result::ok) .map(|r| r.target.into_id()), ) { - eprintln!("TIP {name} {tip}", name = message(tip)); - negotiator.add_tip(tip)?; + if debug { + eprintln!("TIP {name} {tip}", name = message(tip)); + } + negotiator.add_tip(tip, &mut graph)?; } for (round, Round { mut haves, common }) in ParseRounds::new(buf.lines()).enumerate() { if algo == Algorithm::Skipping { @@ -116,26 +122,30 @@ fn run() -> crate::Result { } } for have in haves { - let actual = negotiator.next_have().unwrap_or_else(|| { + let actual = negotiator.next_have(&mut graph).unwrap_or_else(|| { panic!("{algo_name}:cache={use_cache}: one have per baseline: {have} missing or in wrong order", have = message(have)) })?; assert_eq!( actual, have, "{algo_name}:cache={use_cache}: order and commit matches exactly, wanted {expected}, got {actual}, commits left: {:?}", - std::iter::from_fn(|| negotiator.next_have()).map(|id| message(id.unwrap())).collect::>(), + std::iter::from_fn(|| negotiator.next_have(&mut graph)).map(|id| message(id.unwrap())).collect::>(), actual = message(actual), expected = message(have) ); - eprintln!("have {}", message(actual)); + if debug { + eprintln!("have {}", message(actual)); + } } for common_revision in common { - eprintln!("ACK {}", message(common_revision)); - negotiator.in_common_with_remote(common_revision)?; + if debug { + eprintln!("ACK {}", message(common_revision)); + } + negotiator.in_common_with_remote(common_revision, &mut graph)?; } } assert!( - negotiator.next_have().is_none(), + negotiator.next_have(&mut graph).is_none(), "{algo_name}:cache={use_cache}: negotiator should be depleted after all recorded baseline rounds" ); } diff --git a/gix-negotiate/tests/negotiate.rs b/gix-negotiate/tests/negotiate.rs index 4e6a42981f..357b54d615 100644 --- a/gix-negotiate/tests/negotiate.rs +++ b/gix-negotiate/tests/negotiate.rs @@ -35,3 +35,12 @@ mod window_size { } mod baseline; + +#[test] +fn size_of_entry() { + assert_eq!( + std::mem::size_of::>(), + 56, + "we may keep a lot of these, so let's not let them grow unnoticed" + ); +} diff --git a/gix-revision/src/graph/errors.rs b/gix-revision/src/graph/errors.rs index cd3e926118..03bae2b032 100644 --- a/gix-revision/src/graph/errors.rs +++ b/gix-revision/src/graph/errors.rs @@ -10,7 +10,7 @@ pub mod lookup { /// pub mod commit { - /// The error returned by [`try_lookup_commit()`][crate::Graph::try_lookup_commit()]. + /// The error returned by [`try_lookup_or_insert_commit()`][crate::Graph::try_lookup_or_insert_commit()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { diff --git a/gix-revision/src/lib.rs b/gix-revision/src/lib.rs index 463580e63a..dcc92cd652 100644 --- a/gix-revision/src/lib.rs +++ b/gix-revision/src/lib.rs @@ -35,7 +35,8 @@ pub use spec::types::Spec; /// Most usage of the Graph will benefit from fast ODB lookups, so setting up an object cache will be beneficial. If that's not the case, /// the method docs will inform about that. /// -/// Additionally, there is *no need for an object cache* as we keep track of everything we traverse in our own hashmap. +/// Additionally, and only if `T` is [`Commit`][graph::Commit], there is *no need for an object cache* as we keep track of +/// everything related to commit traversal in our own hashmap. pub struct Graph<'find, T> { /// A way to resolve a commit from the object database. find: Box>, From 8c72a236dbeb71a4aead45bf82010f1c89829540 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Jun 2023 17:46:49 +0200 Subject: [PATCH 08/16] feat: add `Store::alternate_db_paths()`. Provides a way to learn about loose database paths that are provided by git alternates. --- gix-odb/src/store_impls/dynamic/mod.rs | 100 +-------------- gix-odb/src/store_impls/dynamic/structure.rs | 117 ++++++++++++++++++ .../fixtures/generated-archives/.gitignore | 1 + gix-odb/tests/fixtures/make_alternates_odb.sh | 21 ++++ gix-odb/tests/odb/store/dynamic.rs | 25 ++++ 5 files changed, 165 insertions(+), 99 deletions(-) create mode 100644 gix-odb/src/store_impls/dynamic/structure.rs create mode 100644 gix-odb/tests/fixtures/make_alternates_odb.sh diff --git a/gix-odb/src/store_impls/dynamic/mod.rs b/gix-odb/src/store_impls/dynamic/mod.rs index 774bb61dc8..e992fada6f 100644 --- a/gix-odb/src/store_impls/dynamic/mod.rs +++ b/gix-odb/src/store_impls/dynamic/mod.rs @@ -82,102 +82,4 @@ mod metrics; mod access; /// -pub mod structure { - use std::path::PathBuf; - - use crate::{store::load_index, types::IndexAndPacks, Store}; - - /// A record of a structural element of an object database. - #[derive(Debug, Clone, PartialEq, Eq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - pub enum Record { - /// A loose object database. - LooseObjectDatabase { - /// The root of the object database. - objects_directory: PathBuf, - /// The amount of object files. - num_objects: usize, - }, - /// A pack index file - Index { - /// The location of the index file, - path: PathBuf, - /// Whether or not the index is mapped into memory. - state: IndexState, - }, - /// A multi-index file - MultiIndex { - /// The location of the multi-index file, - path: PathBuf, - /// Whether or not the index is mapped into memory. - state: IndexState, - }, - /// An empty slot was encountered, this is possibly happening as the ODB changes during query with - /// a file being removed. - Empty, - } - - #[derive(Debug, Clone, PartialEq, Eq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - /// Possible stats of pack indices. - pub enum IndexState { - /// The index is active in memory because a mapping exists. - Loaded, - /// The index couldn't be unloaded as it was still in use, but that can happen another time. - Disposable, - /// The index isn't loaded/memory mapped. - Unloaded, - } - - impl Store { - /// Return information about all files known to us as well as their loading state. - /// - /// Note that this call is expensive as it gathers additional information about loose object databases. - /// Note that it may change as we collect information due to the highly volatile nature of the - /// implementation. The likelihood of actual changes is low though as these still depend on something - /// changing on disk and somebody reading at the same time. - pub fn structure(&self) -> Result, load_index::Error> { - let index = self.index.load(); - if !index.is_initialized() { - self.consolidate_with_disk_state(true, false /*load one new index*/)?; - } - let index = self.index.load(); - let mut res: Vec<_> = index - .loose_dbs - .iter() - .map(|db| Record::LooseObjectDatabase { - objects_directory: db.path.clone(), - num_objects: db.iter().count(), - }) - .collect(); - - for slot in index.slot_indices.iter().map(|idx| &self.files[*idx]) { - let files = slot.files.load(); - let record = match &**files { - Some(index) => { - let state = if index.is_disposable() { - IndexState::Disposable - } else if index.index_is_loaded() { - IndexState::Loaded - } else { - IndexState::Unloaded - }; - match index { - IndexAndPacks::Index(b) => Record::Index { - path: b.index.path().into(), - state, - }, - IndexAndPacks::MultiIndex(b) => Record::MultiIndex { - path: b.multi_index.path().into(), - state, - }, - } - } - None => Record::Empty, - }; - res.push(record); - } - Ok(res) - } - } -} +pub mod structure; diff --git a/gix-odb/src/store_impls/dynamic/structure.rs b/gix-odb/src/store_impls/dynamic/structure.rs new file mode 100644 index 0000000000..687e74d6a5 --- /dev/null +++ b/gix-odb/src/store_impls/dynamic/structure.rs @@ -0,0 +1,117 @@ +use std::path::PathBuf; + +use crate::{store::load_index, types::IndexAndPacks, Store}; + +/// A record of a structural element of an object database. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Record { + /// A loose object database. + LooseObjectDatabase { + /// The root of the object database. + objects_directory: PathBuf, + /// The amount of object files. + num_objects: usize, + }, + /// A pack index file + Index { + /// The location of the index file, + path: PathBuf, + /// Whether or not the index is mapped into memory. + state: IndexState, + }, + /// A multi-index file + MultiIndex { + /// The location of the multi-index file, + path: PathBuf, + /// Whether or not the index is mapped into memory. + state: IndexState, + }, + /// An empty slot was encountered, this is possibly happening as the ODB changes during query with + /// a file being removed. + Empty, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +/// Possible stats of pack indices. +pub enum IndexState { + /// The index is active in memory because a mapping exists. + Loaded, + /// The index couldn't be unloaded as it was still in use, but that can happen another time. + Disposable, + /// The index isn't loaded/memory mapped. + Unloaded, +} + +impl Store { + /// Return information about all files known to us as well as their loading state. + /// + /// Note that this call is expensive as it gathers additional information about loose object databases. + /// Note that it may change as we collect information due to the highly volatile nature of the + /// implementation. The likelihood of actual changes is low though as these still depend on something + /// changing on disk and somebody reading at the same time. + pub fn structure(&self) -> Result, load_index::Error> { + let index = self.index.load(); + if !index.is_initialized() { + self.consolidate_with_disk_state(true, false /*load one new index*/)?; + } + let index = self.index.load(); + let mut res: Vec<_> = index + .loose_dbs + .iter() + .map(|db| Record::LooseObjectDatabase { + objects_directory: db.path.clone(), + num_objects: db.iter().count(), + }) + .collect(); + + for slot in index.slot_indices.iter().map(|idx| &self.files[*idx]) { + let files = slot.files.load(); + let record = match &**files { + Some(index) => { + let state = if index.is_disposable() { + IndexState::Disposable + } else if index.index_is_loaded() { + IndexState::Loaded + } else { + IndexState::Unloaded + }; + match index { + IndexAndPacks::Index(b) => Record::Index { + path: b.index.path().into(), + state, + }, + IndexAndPacks::MultiIndex(b) => Record::MultiIndex { + path: b.multi_index.path().into(), + state, + }, + } + } + None => Record::Empty, + }; + res.push(record); + } + Ok(res) + } + + /// Provide a list of all `objects` directories of `alternate` object database paths. + /// This list might be empty if there are no alternates. + /// + /// Read more about alternates in the documentation of the [`resolve`][crate::alternate::resolve()] function. + pub fn alternate_db_paths(&self) -> Result, load_index::Error> { + let index = self.index.load(); + if !index.is_initialized() { + self.consolidate_with_disk_state(true, false /*load one new index*/)?; + } + let index = self.index.load(); + Ok(index + .loose_dbs + .iter() + .skip( + 1, /* first odb is always the primary one, all the follows is alternates */ + ) + .map(|db| db.path.clone()) + .collect()) + } +} diff --git a/gix-odb/tests/fixtures/generated-archives/.gitignore b/gix-odb/tests/fixtures/generated-archives/.gitignore index f6a0d3bb31..4d1ab0373b 100644 --- a/gix-odb/tests/fixtures/generated-archives/.gitignore +++ b/gix-odb/tests/fixtures/generated-archives/.gitignore @@ -1 +1,2 @@ repo_with_loose_objects.tar.xz +make_alternates_odb.tar.xz diff --git a/gix-odb/tests/fixtures/make_alternates_odb.sh b/gix-odb/tests/fixtures/make_alternates_odb.sh new file mode 100644 index 0000000000..03b9964383 --- /dev/null +++ b/gix-odb/tests/fixtures/make_alternates_odb.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +git checkout -b main +touch this +git add this +git commit -q -m c1 +echo hello >> this +git commit -q -am c2 + +(git init object_source && cd object_source + git checkout -b main + touch this + git add this + git commit -q -m alternate-c1 +) + +echo $PWD/object_source/.git/objects > .git/objects/info/alternates + diff --git a/gix-odb/tests/odb/store/dynamic.rs b/gix-odb/tests/odb/store/dynamic.rs index 53984f14fe..f2e27a477b 100644 --- a/gix-odb/tests/odb/store/dynamic.rs +++ b/gix-odb/tests/odb/store/dynamic.rs @@ -135,6 +135,11 @@ fn multi_index_access() -> crate::Result { ); assert_eq!(handle.store_ref().structure()?.len(), 2); + assert_eq!( + handle.store_ref().alternate_db_paths()?.len(), + 0, + "there are no alternates" + ); Ok(()) } @@ -215,6 +220,26 @@ fn write() -> crate::Result { Ok(()) } +#[test] +fn alternate_dbs_query() -> crate::Result { + let dir = gix_testtools::scripted_fixture_read_only("make_alternates_odb.sh")?; + let handle = gix_odb::at(dir.join(".git/objects"))?; + + let alternates = handle.store_ref().alternate_db_paths()?; + assert_eq!(alternates.len(), 1, "exactly one alternate"); + assert_eq!( + alternates[0] + .ancestors() + .nth(2) + .expect("alternate repo work-dir") + .file_name() + .expect("present"), + "object_source", + "the alternate object location is well-known" + ); + Ok(()) +} + #[test] fn object_replacement() -> crate::Result { let dir = gix_testtools::scripted_fixture_read_only("make_replaced_history.sh")?; From aa2d0643a212a7a619890f3650c7d7005f8f8fd0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Jun 2023 20:44:06 +0200 Subject: [PATCH 09/16] fix!: parse 'handshake' of `file://` based protocol version 0. This special protocol kicks in when `git` serves `file://` directly and no version number is specified. Then it doesn't advertise capabilities at all, but shows 0000 right away. Make sure we can parse it, and show it by adding `Version::V0` as well. --- .../src/client/async_io/bufread_ext.rs | 2 +- .../src/client/blocking_io/bufread_ext.rs | 2 +- gix-transport/src/client/capabilities.rs | 175 ++++++++++-------- gix-transport/src/lib.rs | 2 + gix-transport/tests/client/capabilities.rs | 15 ++ gix-transport/tests/client/mod.rs | 1 - 6 files changed, 120 insertions(+), 77 deletions(-) diff --git a/gix-transport/src/client/async_io/bufread_ext.rs b/gix-transport/src/client/async_io/bufread_ext.rs index 2b4362fc91..f3eb2a67ad 100644 --- a/gix-transport/src/client/async_io/bufread_ext.rs +++ b/gix-transport/src/client/async_io/bufread_ext.rs @@ -125,7 +125,7 @@ impl<'a, T: AsyncRead + Unpin> ExtendedBufRead for gix_packetline::read::WithSid } fn reset(&mut self, version: Protocol) { match version { - Protocol::V1 => self.reset_with(&[gix_packetline::PacketLineRef::Flush]), + Protocol::V0 | Protocol::V1 => self.reset_with(&[gix_packetline::PacketLineRef::Flush]), Protocol::V2 => self.reset_with(&[ gix_packetline::PacketLineRef::Delimiter, gix_packetline::PacketLineRef::Flush, diff --git a/gix-transport/src/client/blocking_io/bufread_ext.rs b/gix-transport/src/client/blocking_io/bufread_ext.rs index 3513a7fd5f..c84d26ecc6 100644 --- a/gix-transport/src/client/blocking_io/bufread_ext.rs +++ b/gix-transport/src/client/blocking_io/bufread_ext.rs @@ -113,7 +113,7 @@ impl<'a, T: io::Read> ExtendedBufRead for gix_packetline::read::WithSidebands<'a } fn reset(&mut self, version: Protocol) { match version { - Protocol::V1 => self.reset_with(&[gix_packetline::PacketLineRef::Flush]), + Protocol::V0 | Protocol::V1 => self.reset_with(&[gix_packetline::PacketLineRef::Flush]), Protocol::V2 => self.reset_with(&[ gix_packetline::PacketLineRef::Delimiter, gix_packetline::PacketLineRef::Flush, diff --git a/gix-transport/src/client/capabilities.rs b/gix-transport/src/client/capabilities.rs index 05185edaec..29b5504bae 100644 --- a/gix-transport/src/client/capabilities.rs +++ b/gix-transport/src/client/capabilities.rs @@ -23,13 +23,29 @@ pub enum Error { } /// A structure to represent multiple [capabilities][Capability] or features supported by the server. -#[derive(Debug, Clone, Default)] +/// +/// ### Deviation +/// +/// As a *shortcoming*, we are unable to parse `V1` as emitted from `git-upload-pack` without a `git-daemon` or server, +/// as it will not emit any capabilities for some reason. Only `V2` and `V0` work in that context. +#[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Capabilities { data: BString, value_sep: u8, } +/// This implementation yields exactly those minimal capabilities that are required for `gix` to work, nothing more and nothing less. +/// +/// This is a bit of a hack just get tests with Protocol V0 to work, which is a good way to enforce stateful transports. +/// Of course, V1 would also do that but when calling `git-upload-pack` directly, it advertises so badly that this is easier to implement. +impl Default for Capabilities { + fn default() -> Self { + Capabilities::from_lines("version 2\nmulti_ack_detailed\nside-band-64k\n".into()) + .expect("valid format, known at compile time") + } +} + /// The name of a single capability. pub struct Capability<'a>(&'a BStr); @@ -185,44 +201,50 @@ pub mod recv { // format looks like, thus there is no binary blob that could ever look like an ERR line by accident. rd.fail_on_err_lines(true); - let line = rd - .peek_line() - .ok_or(client::Error::ExpectedLine("capabilities or version"))???; - let line = line.as_text().ok_or(client::Error::ExpectedLine("text"))?; - - let version = Capabilities::extract_protocol(line)?; - match version { - Protocol::V1 => { - let (capabilities, delimiter_position) = Capabilities::from_bytes(line.0)?; - rd.peek_buffer_replace_and_truncate(delimiter_position, b'\n'); - Ok(Outcome { - capabilities, - refs: Some(Box::new(rd.as_read())), - protocol: Protocol::V1, - }) - } - Protocol::V2 => Ok(Outcome { - capabilities: { - let mut rd = rd.as_read(); - let mut buf = Vec::new(); - while let Some(line) = rd.read_data_line() { - let line = line??; - match line.as_bstr() { - Some(line) => { - buf.push_str(line); - if buf.last() != Some(&b'\n') { - buf.push(b'\n'); - } - } - None => break, + Ok(match rd.peek_line() { + Some(line) => { + let line = line??.as_text().ok_or(client::Error::ExpectedLine("text"))?; + let version = Capabilities::extract_protocol(line)?; + match version { + Protocol::V0 => unreachable!("already handled in `None` case"), + Protocol::V1 => { + let (capabilities, delimiter_position) = Capabilities::from_bytes(line.0)?; + rd.peek_buffer_replace_and_truncate(delimiter_position, b'\n'); + Outcome { + capabilities, + refs: Some(Box::new(rd.as_read())), + protocol: Protocol::V1, } } - Capabilities::from_lines(buf.into())? - }, - refs: None, - protocol: Protocol::V2, - }), - } + Protocol::V2 => Outcome { + capabilities: { + let mut rd = rd.as_read(); + let mut buf = Vec::new(); + while let Some(line) = rd.read_data_line() { + let line = line??; + match line.as_bstr() { + Some(line) => { + buf.push_str(line); + if buf.last() != Some(&b'\n') { + buf.push(b'\n'); + } + } + None => break, + } + } + Capabilities::from_lines(buf.into())? + }, + refs: None, + protocol: Protocol::V2, + }, + } + } + None => Outcome { + capabilities: Capabilities::default(), + refs: Some(Box::new(rd.as_read())), + protocol: Protocol::V0, + }, + }) } } } @@ -263,45 +285,50 @@ pub mod recv { // format looks like, thus there is no binary blob that could ever look like an ERR line by accident. rd.fail_on_err_lines(true); - let line = rd - .peek_line() - .await - .ok_or(client::Error::ExpectedLine("capabilities or version"))???; - let line = line.as_text().ok_or(client::Error::ExpectedLine("text"))?; - - let version = Capabilities::extract_protocol(line)?; - match version { - Protocol::V1 => { - let (capabilities, delimiter_position) = Capabilities::from_bytes(line.0)?; - rd.peek_buffer_replace_and_truncate(delimiter_position, b'\n'); - Ok(Outcome { - capabilities, - refs: Some(Box::new(rd.as_read())), - protocol: Protocol::V1, - }) - } - Protocol::V2 => Ok(Outcome { - capabilities: { - let mut rd = rd.as_read(); - let mut buf = Vec::new(); - while let Some(line) = rd.read_data_line().await { - let line = line??; - match line.as_bstr() { - Some(line) => { - buf.push_str(line); - if buf.last() != Some(&b'\n') { - buf.push(b'\n'); - } - } - None => break, + Ok(match rd.peek_line().await { + Some(line) => { + let line = line??.as_text().ok_or(client::Error::ExpectedLine("text"))?; + let version = Capabilities::extract_protocol(line)?; + match version { + Protocol::V0 => unreachable!("already handled in `None` case"), + Protocol::V1 => { + let (capabilities, delimiter_position) = Capabilities::from_bytes(line.0)?; + rd.peek_buffer_replace_and_truncate(delimiter_position, b'\n'); + Outcome { + capabilities, + refs: Some(Box::new(rd.as_read())), + protocol: Protocol::V1, } } - Capabilities::from_lines(buf.into())? - }, - refs: None, - protocol: Protocol::V2, - }), - } + Protocol::V2 => Outcome { + capabilities: { + let mut rd = rd.as_read(); + let mut buf = Vec::new(); + while let Some(line) = rd.read_data_line().await { + let line = line??; + match line.as_bstr() { + Some(line) => { + buf.push_str(line); + if buf.last() != Some(&b'\n') { + buf.push(b'\n'); + } + } + None => break, + } + } + Capabilities::from_lines(buf.into())? + }, + refs: None, + protocol: Protocol::V2, + }, + } + } + None => Outcome { + capabilities: Capabilities::default(), + refs: Some(Box::new(rd.as_read())), + protocol: Protocol::V0, + }, + }) } } } diff --git a/gix-transport/src/lib.rs b/gix-transport/src/lib.rs index 96e03a0e83..a098e635a1 100644 --- a/gix-transport/src/lib.rs +++ b/gix-transport/src/lib.rs @@ -23,6 +23,8 @@ pub use gix_packetline as packetline; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[allow(missing_docs)] pub enum Protocol { + /// Version 0 is like V1, but doesn't show capabilities at all, at least when hosted without `git-daemon`. + V0 = 0, /// Version 1 was the first one conceived, is stateful, and our implementation was seen to cause deadlocks. Prefer V2 V1 = 1, /// A command-based and stateless protocol with clear semantics, and the one to use assuming the server isn't very old. diff --git a/gix-transport/tests/client/capabilities.rs b/gix-transport/tests/client/capabilities.rs index 8cb20f6953..da37ac1296 100644 --- a/gix-transport/tests/client/capabilities.rs +++ b/gix-transport/tests/client/capabilities.rs @@ -49,3 +49,18 @@ fn from_bytes() -> crate::Result { ); Ok(()) } + +#[maybe_async::test(feature = "blocking-client", async(feature = "async-client", async_std::test))] +async fn from_lines_with_version_detection_v0() -> crate::Result { + let mut buf = Vec::::new(); + gix_packetline::encode::flush_to_write(&mut buf).await?; + let mut stream = + gix_packetline::StreamingPeekableIter::new(buf.as_slice(), &[gix_packetline::PacketLineRef::Flush]); + let caps = Capabilities::from_lines_with_version_detection(&mut stream) + .await + .expect("we can parse V0 as very special case, useful for testing stateful connections in other crates") + .capabilities; + assert!(caps.contains("multi_ack_detailed")); + assert!(caps.contains("side-band-64k")); + Ok(()) +} diff --git a/gix-transport/tests/client/mod.rs b/gix-transport/tests/client/mod.rs index 15ff276ead..b956864a11 100644 --- a/gix-transport/tests/client/mod.rs +++ b/gix-transport/tests/client/mod.rs @@ -1,5 +1,4 @@ #[cfg(feature = "blocking-client")] mod blocking_io; -#[cfg(not(feature = "http-client-curl"))] mod capabilities; mod git; From 60eaceb5cd4b9206e9262cf05d9036c31a7b7d11 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Jun 2023 21:58:47 +0200 Subject: [PATCH 10/16] adapt to changes in `gix-transport` --- gix-protocol/src/command/mod.rs | 6 +++--- gix-protocol/src/fetch/arguments/async_io.rs | 2 +- gix-protocol/src/fetch/arguments/blocking_io.rs | 2 +- gix-protocol/src/fetch/arguments/mod.rs | 2 +- gix-protocol/src/fetch/response/async_io.rs | 2 +- gix-protocol/src/fetch/response/blocking_io.rs | 2 +- gix-protocol/src/fetch/response/mod.rs | 2 +- gix-protocol/src/handshake/function.rs | 10 ++++++---- 8 files changed, 15 insertions(+), 13 deletions(-) diff --git a/gix-protocol/src/command/mod.rs b/gix-protocol/src/command/mod.rs index d560220d01..1216ba6250 100644 --- a/gix-protocol/src/command/mod.rs +++ b/gix-protocol/src/command/mod.rs @@ -60,7 +60,7 @@ mod with_io { match self { Command::LsRefs => &[], Command::Fetch => match version { - gix_transport::Protocol::V1 => &[ + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => &[ "multi_ack", "thin-pack", "side-band", @@ -120,7 +120,7 @@ mod with_io { ) -> Vec { match self { Command::Fetch => match version { - gix_transport::Protocol::V1 => { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { let has_multi_ack_detailed = server_capabilities.contains("multi_ack_detailed"); let has_sideband_64k = server_capabilities.contains("side-band-64k"); self.all_features(version) @@ -173,7 +173,7 @@ mod with_io { panic!("{}: argument {} is not known or allowed", self.as_str(), arg); } match version { - gix_transport::Protocol::V1 => { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { for (feature, _) in features { if server .iter() diff --git a/gix-protocol/src/fetch/arguments/async_io.rs b/gix-protocol/src/fetch/arguments/async_io.rs index 3984ec6104..fc876d02cf 100644 --- a/gix-protocol/src/fetch/arguments/async_io.rs +++ b/gix-protocol/src/fetch/arguments/async_io.rs @@ -14,7 +14,7 @@ impl Arguments { assert!(add_done_argument, "If there are no haves, is_done must be true."); } match self.version { - gix_transport::Protocol::V1 => { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { let (on_into_read, retained_state) = self.prepare_v1( transport.connection_persists_across_multiple_requests(), add_done_argument, diff --git a/gix-protocol/src/fetch/arguments/blocking_io.rs b/gix-protocol/src/fetch/arguments/blocking_io.rs index b49d1a1ba3..571792148f 100644 --- a/gix-protocol/src/fetch/arguments/blocking_io.rs +++ b/gix-protocol/src/fetch/arguments/blocking_io.rs @@ -15,7 +15,7 @@ impl Arguments { assert!(add_done_argument, "If there are no haves, is_done must be true."); } match self.version { - gix_transport::Protocol::V1 => { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { let (on_into_read, retained_state) = self.prepare_v1( transport.connection_persists_across_multiple_requests(), add_done_argument, diff --git a/gix-protocol/src/fetch/arguments/mod.rs b/gix-protocol/src/fetch/arguments/mod.rs index 9b24e9b9c0..b69778a022 100644 --- a/gix-protocol/src/fetch/arguments/mod.rs +++ b/gix-protocol/src/fetch/arguments/mod.rs @@ -173,7 +173,7 @@ impl Arguments { let mut deepen_relative = shallow; let supports_include_tag; let (initial_arguments, features_for_first_want) = match version { - gix_transport::Protocol::V1 => { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { deepen_since = has("deepen-since"); deepen_not = has("deepen-not"); deepen_relative = has("deepen-relative"); diff --git a/gix-protocol/src/fetch/response/async_io.rs b/gix-protocol/src/fetch/response/async_io.rs index 550ed46b66..313a9e3575 100644 --- a/gix-protocol/src/fetch/response/async_io.rs +++ b/gix-protocol/src/fetch/response/async_io.rs @@ -37,7 +37,7 @@ impl Response { reader: &mut (impl client::ExtendedBufRead + Unpin), ) -> Result { match version { - Protocol::V1 => { + Protocol::V0 | Protocol::V1 => { let mut line = String::new(); let mut acks = Vec::::new(); let mut shallows = Vec::::new(); diff --git a/gix-protocol/src/fetch/response/blocking_io.rs b/gix-protocol/src/fetch/response/blocking_io.rs index 7a3f2deb3b..29893ffb23 100644 --- a/gix-protocol/src/fetch/response/blocking_io.rs +++ b/gix-protocol/src/fetch/response/blocking_io.rs @@ -37,7 +37,7 @@ impl Response { reader: &mut impl client::ExtendedBufRead, ) -> Result { match version { - Protocol::V1 => { + Protocol::V0 | Protocol::V1 => { let mut line = String::new(); let mut acks = Vec::::new(); let mut shallows = Vec::::new(); diff --git a/gix-protocol/src/fetch/response/mod.rs b/gix-protocol/src/fetch/response/mod.rs index bfb4beb830..5f2f7f007f 100644 --- a/gix-protocol/src/fetch/response/mod.rs +++ b/gix-protocol/src/fetch/response/mod.rs @@ -170,7 +170,7 @@ impl Response { /// make it easy to maintain all versions with a single code base that aims to be and remain maintainable. pub fn check_required_features(version: Protocol, features: &[Feature]) -> Result<(), Error> { match version { - Protocol::V1 => { + Protocol::V0 | Protocol::V1 => { let has = |name: &str| features.iter().any(|f| f.0 == name); // Let's focus on V2 standards, and simply not support old servers to keep our code simpler if !has("multi_ack_detailed") { diff --git a/gix-protocol/src/handshake/function.rs b/gix-protocol/src/handshake/function.rs index 1206ee3630..6324fb3e10 100644 --- a/gix-protocol/src/handshake/function.rs +++ b/gix-protocol/src/handshake/function.rs @@ -77,10 +77,12 @@ where let parsed_refs = match refs { Some(mut refs) => { - assert_eq!( - actual_protocol, - gix_transport::Protocol::V1, - "Only V1 auto-responds with refs" + assert!( + matches!( + actual_protocol, + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 + ), + "Only V(0|1) auto-responds with refs" ); Some( refs::from_v1_refs_received_as_part_of_handshake_and_capabilities(&mut refs, capabilities.iter()) From 877aa2921d8491d301945f86d5a69382e40fb081 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jun 2023 19:21:47 +0200 Subject: [PATCH 11/16] feat: Add `fetch::Arguments::is_stateless()` to aid proper use of arguments. When arguments are used, haves are reset every round in stateless protocols, while everything else is repeated. However, this also means that previously confirmed common commits aren't repeated unless this is specifically implemented by the user of `Arguments`. That caller can now easily determine if negotiations have to be compensated for. Please note that `Arguments` explicitly doesn't implement repeating of all prior arguments, which would also repeat a lot of *in-vain* haves. --- gix-protocol/src/fetch/arguments/mod.rs | 12 ++++++++++++ gix-protocol/src/fetch/tests.rs | 2 ++ gix-protocol/tests/fetch/response.rs | 8 ++++++++ 3 files changed, 22 insertions(+) diff --git a/gix-protocol/src/fetch/arguments/mod.rs b/gix-protocol/src/fetch/arguments/mod.rs index b69778a022..a88ed87ba1 100644 --- a/gix-protocol/src/fetch/arguments/mod.rs +++ b/gix-protocol/src/fetch/arguments/mod.rs @@ -78,6 +78,18 @@ impl Arguments { pub fn can_use_include_tag(&self) -> bool { self.supports_include_tag } + /// Return true if we will use a stateless mode of operation, which can be decided in conjunction with `transport_is_stateless`. + /// + /// * we are always stateless if the transport is stateless, i.e. doesn't support multiple interactions with a single connection. + /// * we are always stateless if the protocol version is `2` + /// * otherwise we may be stateful. + pub fn is_stateless(&self, transport_is_stateless: bool) -> bool { + #[cfg(any(feature = "async-client", feature = "blocking-client"))] + let res = transport_is_stateless || self.version == gix_transport::Protocol::V2; + #[cfg(not(any(feature = "async-client", feature = "blocking-client")))] + let res = transport_is_stateless; + res + } /// Add the given `id` pointing to a commit to the 'want' list. /// diff --git a/gix-protocol/src/fetch/tests.rs b/gix-protocol/src/fetch/tests.rs index 599e5df47a..cda46b283e 100644 --- a/gix-protocol/src/fetch/tests.rs +++ b/gix-protocol/src/fetch/tests.rs @@ -298,6 +298,8 @@ mod arguments { let mut out = Vec::new(); let mut t = transport(&mut out, true); let mut arguments = arguments_v2(["feature-a", "shallow"].iter().copied()); + assert!(arguments.is_stateless(true), "V2 is stateless…"); + assert!(arguments.is_stateless(false), "…in all cases"); arguments.deepen(1); arguments.deepen_relative(); diff --git a/gix-protocol/tests/fetch/response.rs b/gix-protocol/tests/fetch/response.rs index d6aedee3b3..4d1c3f8fbb 100644 --- a/gix-protocol/tests/fetch/response.rs +++ b/gix-protocol/tests/fetch/response.rs @@ -149,6 +149,14 @@ mod v1 { async fn all() -> crate::Result { let (caps, _) = Capabilities::from_bytes(&b"7814e8a05a59c0cf5fb186661d1551c75d1299b5 HEAD\0multi_ack thin-pack filter side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative no-progress include-tag multi_ack_detailed symref=HEAD:refs/heads/master object-format=sha1 agent=git/2.28.0"[..])?; let mut args = fetch::Arguments::new(Protocol::V1, Command::Fetch.default_features(Protocol::V1, &caps)); + assert!( + args.is_stateless(true /* transport is stateless */), + "V1 is stateless if the transport is connection oriented" + ); + assert!( + !args.is_stateless(false /* transport is stateless */), + "otherwise V1 is stateful" + ); assert!(args.can_use_shallow()); assert!(args.can_use_deepen()); assert!(args.can_use_deepen_not()); From c5dc7b4c43f07c04cdfb218de03e6725ff3fdb64 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jun 2023 14:27:26 +0200 Subject: [PATCH 12/16] fix: `include-tag` is now properly handled in V1 fetch arguments Previously it would be added like it's V2 arguments, which makes using it in V1 impossible. --- gix-protocol/src/fetch/arguments/mod.rs | 16 +++++++++++++++- gix-protocol/src/fetch/tests.rs | 22 ++++++++++++++++++++-- gix-protocol/tests/fetch/response.rs | 2 +- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/gix-protocol/src/fetch/arguments/mod.rs b/gix-protocol/src/fetch/arguments/mod.rs index a88ed87ba1..1adf993bb0 100644 --- a/gix-protocol/src/fetch/arguments/mod.rs +++ b/gix-protocol/src/fetch/arguments/mod.rs @@ -165,7 +165,18 @@ impl Arguments { pub fn use_include_tag(&mut self) { debug_assert!(self.supports_include_tag, "'include-tag' feature required"); if self.supports_include_tag { - self.args.push("include-tag".into()); + match self.version { + gix_transport::Protocol::V0 | gix_transport::Protocol::V1 => { + let features = self + .features_for_first_want + .as_mut() + .expect("call use_include_tag before want()"); + features.push("include-tag".into()) + } + gix_transport::Protocol::V2 => { + self.args.push("include-tag".into()); + } + } } } fn prefixed(&mut self, prefix: &str, value: impl fmt::Display) { @@ -192,6 +203,9 @@ impl Arguments { supports_include_tag = has("include-tag"); let baked_features = features .iter() + .filter( + |(f, _)| *f != "include-tag", /* not a capability in that sense, needs to be turned on by caller later */ + ) .map(|(n, v)| match v { Some(v) => format!("{n}={v}"), None => n.to_string(), diff --git a/gix-protocol/src/fetch/tests.rs b/gix-protocol/src/fetch/tests.rs index cda46b283e..80dc752dd2 100644 --- a/gix-protocol/src/fetch/tests.rs +++ b/gix-protocol/src/fetch/tests.rs @@ -171,14 +171,32 @@ mod arguments { arguments.send(&mut t, true).await.expect("sending to buffer to work"); assert_eq!( out.as_bstr(), - b"0048want ff333369de1221f9bfbbe03a3a13e9a09bc1ffff include-tag feature-b -0010include-tag + b"0048want ff333369de1221f9bfbbe03a3a13e9a09bc1ffff feature-b include-tag 00000009done " .as_bstr() ); } + #[maybe_async::test(feature = "blocking-client", async(feature = "async-client", async_std::test))] + async fn no_include_tag() { + let mut out = Vec::new(); + let mut t = transport(&mut out, true); + let mut arguments = arguments_v1(["include-tag", "feature-b"].iter().copied()); + assert!(arguments.can_use_include_tag()); + + arguments.want(id("ff333369de1221f9bfbbe03a3a13e9a09bc1ffff")); + arguments.send(&mut t, true).await.expect("sending to buffer to work"); + assert_eq!( + out.as_bstr(), + b"003cwant ff333369de1221f9bfbbe03a3a13e9a09bc1ffff feature-b +00000009done +" + .as_bstr(), + "it's possible to not have it enabled, even though it's advertised by the server" + ); + } + #[maybe_async::test(feature = "blocking-client", async(feature = "async-client", async_std::test))] async fn haves_and_wants_for_clone() { let mut out = Vec::new(); diff --git a/gix-protocol/tests/fetch/response.rs b/gix-protocol/tests/fetch/response.rs index 4d1c3f8fbb..85d42d4843 100644 --- a/gix-protocol/tests/fetch/response.rs +++ b/gix-protocol/tests/fetch/response.rs @@ -188,7 +188,7 @@ mod v1 { let _response = args.send(&mut transport, true).await?; drop(_response); - assert_eq!(out.as_slice().as_bstr(), "00aawant aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa thin-pack side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative include-tag multi_ack_detailed filter\n000ddeepen 1\n0014deepen-relative\n0018deepen-since 123456\n0013deepen-not tag\n0035shallow 97c5a932b3940a09683e924ef6a92b31a6f7c6de\n00000032have bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n0009done\n"); + assert_eq!(out.as_slice().as_bstr(), "009ewant aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa thin-pack side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative multi_ack_detailed filter\n000ddeepen 1\n0014deepen-relative\n0018deepen-since 123456\n0013deepen-not tag\n0035shallow 97c5a932b3940a09683e924ef6a92b31a6f7c6de\n00000032have bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n0009done\n"); Ok(()) } } From 6a3c02131c9ebca911be5f751e5a6c67fbdbf609 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jun 2023 15:37:41 +0200 Subject: [PATCH 13/16] fix!: make V1 stateless negotations work. This is done by working around another V1 negotiation oddity by exploiting client-side knowledge. For very old servers, we probably wouldn't be able to do multi-rounds without dead-locking, but with recent-enough (probably 10 years or so) old git servers all should work fine. All this to not actually have to implement the V1 strangeness, allowing our code to work smoothly with all permutations of stateless/stateful connections and V1/V2 interactions, with a single high-level implementation essentially. --- gix-protocol/src/fetch/response/async_io.rs | 13 ++++++-- .../src/fetch/response/blocking_io.rs | 13 ++++++-- gix-protocol/src/fetch_fn.rs | 7 ++++- gix-protocol/tests/fetch/response.rs | 30 ++++++++++--------- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/gix-protocol/src/fetch/response/async_io.rs b/gix-protocol/src/fetch/response/async_io.rs index 313a9e3575..f37307619e 100644 --- a/gix-protocol/src/fetch/response/async_io.rs +++ b/gix-protocol/src/fetch/response/async_io.rs @@ -32,9 +32,15 @@ async fn parse_v2_section( impl Response { /// Parse a response of the given `version` of the protocol from `reader`. + /// + /// `client_expects_pack` is only relevant for V1 stateful connections, and if `false`, causes us to stop parsing when seeing `NAK`, + /// and if `true` we will keep parsing until we get a pack as the client already signalled to the server that it's done. + /// This way of doing things allows us to exploit knowledge about more recent versions of the protocol, which keeps code easier + /// and more localized without having to support all the cruft that there is. pub async fn from_line_reader( version: Protocol, reader: &mut (impl client::ExtendedBufRead + Unpin), + client_expects_pack: bool, ) -> Result { match version { Protocol::V0 | Protocol::V1 => { @@ -48,8 +54,8 @@ impl Response { // This special case (hang/block forever) deals with a single NAK being a legitimate EOF sometimes // Note that this might block forever in stateful connections as there it's not really clear // if something will be following or not by just looking at the response. Instead you have to know - // the arguments sent to the server and count response lines based on intricate knowledge on how the - // server works. + // [a lot](https://github.com/git/git/blob/9e49351c3060e1fa6e0d2de64505b7becf157f28/fetch-pack.c#L583-L594) + // to deal with this correctly. // For now this is acceptable, as V2 can be used as a workaround, which also is the default. Some(Err(err)) if err.kind() == io::ErrorKind::UnexpectedEof => break 'lines false, Some(Err(err)) => return Err(err.into()), @@ -75,6 +81,9 @@ impl Response { if Response::parse_v1_ack_or_shallow_or_assume_pack(&mut acks, &mut shallows, &peeked_line) { break 'lines true; } + if let Some(Acknowledgement::Nak) = acks.last().filter(|_| !client_expects_pack) { + break 'lines false; + } assert_ne!( reader.readline_str(&mut line).await?, 0, diff --git a/gix-protocol/src/fetch/response/blocking_io.rs b/gix-protocol/src/fetch/response/blocking_io.rs index 29893ffb23..35bf49f305 100644 --- a/gix-protocol/src/fetch/response/blocking_io.rs +++ b/gix-protocol/src/fetch/response/blocking_io.rs @@ -32,9 +32,15 @@ fn parse_v2_section( impl Response { /// Parse a response of the given `version` of the protocol from `reader`. + /// + /// `client_expects_pack` is only relevant for V1 stateful connections, and if `false`, causes us to stop parsing when seeing `NAK`, + /// and if `true` we will keep parsing until we get a pack as the client already signalled to the server that it's done. + /// This way of doing things allows us to exploit knowledge about more recent versions of the protocol, which keeps code easier + /// and more localized without having to support all the cruft that there is. pub fn from_line_reader( version: Protocol, reader: &mut impl client::ExtendedBufRead, + client_expects_pack: bool, ) -> Result { match version { Protocol::V0 | Protocol::V1 => { @@ -48,8 +54,8 @@ impl Response { // This special case (hang/block forever) deals with a single NAK being a legitimate EOF sometimes // Note that this might block forever in stateful connections as there it's not really clear // if something will be following or not by just looking at the response. Instead you have to know - // the arguments sent to the server and count response lines based on intricate knowledge on how the - // server works. + // [a lot](https://github.com/git/git/blob/9e49351c3060e1fa6e0d2de64505b7becf157f28/fetch-pack.c#L583-L594) + // to deal with this correctly. // For now this is acceptable, as V2 can be used as a workaround, which also is the default. Some(Err(err)) if err.kind() == io::ErrorKind::UnexpectedEof => break 'lines false, Some(Err(err)) => return Err(err.into()), @@ -75,6 +81,9 @@ impl Response { if Response::parse_v1_ack_or_shallow_or_assume_pack(&mut acks, &mut shallows, &peeked_line) { break 'lines true; } + if let Some(Acknowledgement::Nak) = acks.last().filter(|_| !client_expects_pack) { + break 'lines false; + } assert_ne!(reader.readline_str(&mut line)?, 0, "consuming a peeked line works"); }; Ok(Response { diff --git a/gix-protocol/src/fetch_fn.rs b/gix-protocol/src/fetch_fn.rs index 5899ed95fc..64d457711b 100644 --- a/gix-protocol/src/fetch_fn.rs +++ b/gix-protocol/src/fetch_fn.rs @@ -128,7 +128,12 @@ where if sideband_all { setup_remote_progress(&mut progress, &mut reader); } - let response = Response::from_line_reader(protocol_version, &mut reader).await?; + let response = Response::from_line_reader( + protocol_version, + &mut reader, + true, /* hack, telling us we don't want this delegate approach anymore */ + ) + .await?; previous_response = if response.has_pack() { progress.step(); progress.set_name("receiving pack"); diff --git a/gix-protocol/tests/fetch/response.rs b/gix-protocol/tests/fetch/response.rs index 85d42d4843..8d91a6786c 100644 --- a/gix-protocol/tests/fetch/response.rs +++ b/gix-protocol/tests/fetch/response.rs @@ -29,7 +29,7 @@ mod v1 { async fn clone() -> crate::Result { let mut provider = mock_reader("v1/clone-only.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader, true).await?; assert_eq!(r.acknowledgements(), &[Acknowledgement::Nak]); assert!(r.has_pack()); let mut buf = Vec::new(); @@ -42,7 +42,7 @@ mod v1 { async fn shallow_clone() -> crate::Result { let mut provider = mock_reader("v1/clone-deepen-1.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader, true).await?; assert_eq!( r.shallow_updates(), &[ShallowUpdate::Shallow(id("808e50d724f604f69ab93c6da2919c014667bedb"))] @@ -59,7 +59,7 @@ mod v1 { async fn empty_shallow_clone_due_to_depth_being_too_high() -> crate::Result { let mut provider = mock_reader("v1/clone-deepen-5.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader, true).await?; assert!(r.shallow_updates().is_empty()); assert_eq!(r.acknowledgements(), &[Acknowledgement::Nak]); assert!(r.has_pack()); @@ -73,7 +73,7 @@ mod v1 { async fn unshallow_fetch() -> crate::Result { let mut provider = mock_reader("v1/fetch-unshallow.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader, true).await?; assert_eq!( r.acknowledgements(), &[ @@ -103,7 +103,8 @@ mod v1 { #[maybe_async::test(feature = "blocking-client", async(feature = "async-client", async_std::test))] async fn fetch_acks_without_pack() -> crate::Result { let mut provider = mock_reader("v1/fetch-no-pack.response"); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut provider.as_read_without_sidebands()).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut provider.as_read_without_sidebands(), true) + .await?; assert_eq!( r.acknowledgements(), &[ @@ -119,7 +120,7 @@ mod v1 { async fn fetch_acks_and_pack() -> crate::Result { let mut provider = mock_reader("v1/fetch.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V1, &mut reader, true).await?; assert_eq!( r.acknowledgements(), &[ @@ -218,7 +219,7 @@ mod v2 { ); let mut provider = mock_reader(&fixture); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert!(r.acknowledgements().is_empty(), "it should go straight to the packfile"); assert!(r.has_pack()); reader.set_progress_handler(Some(Box::new(|_is_err, _text| { @@ -235,7 +236,7 @@ mod v2 { async fn shallow_clone() -> crate::Result { let mut provider = mock_reader("v2/clone-deepen-1.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert!(r.acknowledgements().is_empty(), "it should go straight to the packfile"); assert_eq!( r.shallow_updates(), @@ -252,7 +253,7 @@ mod v2 { async fn unshallow_fetch() -> crate::Result { let mut provider = mock_reader("v2/fetch-unshallow.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert_eq!( r.acknowledgements(), &[ @@ -284,7 +285,7 @@ mod v2 { async fn empty_shallow_clone() -> crate::Result { let mut provider = mock_reader("v2/clone-deepen-5.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert!(r.acknowledgements().is_empty(), "it should go straight to the packfile"); assert!(r.shallow_updates().is_empty(), "it should go straight to the packfile"); assert!(r.has_pack()); @@ -298,7 +299,7 @@ mod v2 { async fn clone_with_sidebands() -> crate::Result { let mut provider = mock_reader("v2/clone-only-2.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert!(r.acknowledgements().is_empty(), "it should go straight to the packfile"); assert!(r.has_pack()); @@ -320,7 +321,8 @@ mod v2 { #[maybe_async::test(feature = "blocking-client", async(feature = "async-client", async_std::test))] async fn fetch_acks_without_pack() -> crate::Result { let mut provider = mock_reader("v2/fetch-no-pack.response"); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut provider.as_read_without_sidebands()).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut provider.as_read_without_sidebands(), true) + .await?; assert_eq!(r.acknowledgements(), &[Acknowledgement::Nak]); Ok(()) } @@ -330,7 +332,7 @@ mod v2 { let mut provider = mock_reader("v2/fetch-err-line.response"); provider.fail_on_err_lines(true); let mut sidebands = provider.as_read_without_sidebands(); - match fetch::Response::from_line_reader(Protocol::V2, &mut sidebands).await { + match fetch::Response::from_line_reader(Protocol::V2, &mut sidebands, true).await { Ok(_) => panic!("need error response"), Err(err) => match err { fetch::response::Error::UploadPack(err) => { @@ -345,7 +347,7 @@ mod v2 { async fn fetch_acks_and_pack() -> crate::Result { let mut provider = mock_reader("v2/fetch.response"); let mut reader = provider.as_read_without_sidebands(); - let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader).await?; + let r = fetch::Response::from_line_reader(Protocol::V2, &mut reader, true).await?; assert_eq!( r.acknowledgements(), &[ From af0ef2f36736e3805f769d8cd59c9fa7eb6a22a0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jun 2023 16:29:06 +0200 Subject: [PATCH 14/16] feat: use `gix-negotiate` in fetch machinery. Thanks to it we are finally able to do pack negotiations just like git can, as many rounds as it takes and with all available algorithms. Works for V1 and V2 and for stateless and stateful transports. --- Cargo.lock | 1 + crate-status.md | 8 +- gix/Cargo.toml | 1 + gix/src/clone/fetch/mod.rs | 30 +- gix/src/clone/mod.rs | 2 +- gix/src/config/tree/keys.rs | 3 +- gix/src/config/tree/sections/protocol.rs | 54 ++- gix/src/lib.rs | 1 + gix/src/remote/connect.rs | 26 +- gix/src/remote/connection/fetch/mod.rs | 6 + gix/src/remote/connection/fetch/negotiate.rs | 384 +++++++++++++++--- .../remote/connection/fetch/receive_pack.rs | 279 +++++++------ gix/src/remote/fetch.rs | 31 +- gix/src/repository/graph.rs | 24 ++ gix/src/repository/mod.rs | 1 + gix/tests/clone/mod.rs | 86 ++-- gix/tests/config/tree.rs | 78 +++- gix/tests/fixtures/make_remote_repos.sh | 42 ++ gix/tests/remote/fetch.rs | 117 +++++- 19 files changed, 882 insertions(+), 292 deletions(-) create mode 100644 gix/src/repository/graph.rs diff --git a/Cargo.lock b/Cargo.lock index 8ff9f7bd1e..eadf0e9ee9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1301,6 +1301,7 @@ dependencies = [ "gix-index 0.16.1", "gix-lock 5.0.1", "gix-mailmap", + "gix-negotiate", "gix-object 0.29.2", "gix-odb", "gix-pack", diff --git a/crate-status.md b/crate-status.md index f7ddf912ce..51d134d46c 100644 --- a/crate-status.md +++ b/crate-status.md @@ -233,6 +233,7 @@ Check out the [performance discussion][gix-traverse-performance] as well. * [x] nested traversal * **commits** * [x] ancestor graph traversal similar to `git revlog` + * [ ] `commitgraph` support * [x] API documentation * [ ] Examples @@ -670,8 +671,11 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README. * [x] fetch * [x] shallow (remains shallow, options to adjust shallow boundary) * [ ] a way to auto-explode small packs to avoid them to pile up - * [ ] 'ref-in-want' - * [ ] standard negotiation algorithms (right now we only have a 'naive' one) + * [x] 'ref-in-want' + * [ ] 'wanted-ref' + * [ ] standard negotiation algorithms `consecutive`, `skipping` and `noop`. + * [ ] a more efficient way to deal [with common `have`](https://github.com/git/git/blob/9e49351c3060e1fa6e0d2de64505b7becf157f28/fetch-pack.c#L525) + during negotiation - we would submit known non-common `haves` each round in stateless transports whereas git prunes the set to known common ones. * [ ] push * [x] ls-refs * [x] ls-refs with ref-spec filter diff --git a/gix/Cargo.toml b/gix/Cargo.toml index aad23ca49c..16cb8243db 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -131,6 +131,7 @@ gix-object = { version = "^0.29.2", path = "../gix-object" } gix-actor = { version = "^0.20.0", path = "../gix-actor" } gix-pack = { version = "^0.35.0", path = "../gix-pack", features = ["object-cache-dynamic"] } gix-revision = { version = "^0.14.0", path = "../gix-revision" } +gix-negotiate = { version = "0.1.0", path = "../gix-negotiate" } gix-path = { version = "^0.8.0", path = "../gix-path" } gix-url = { version = "^0.18.0", path = "../gix-url" } diff --git a/gix/src/clone/fetch/mod.rs b/gix/src/clone/fetch/mod.rs index 59f820675d..e20cc96cb4 100644 --- a/gix/src/clone/fetch/mod.rs +++ b/gix/src/clone/fetch/mod.rs @@ -44,7 +44,12 @@ impl PrepareFetch { /// it was newly initialized. /// /// Note that all data we created will be removed once this instance drops if the operation wasn't successful. - pub fn fetch_only

( + /// + /// ### Note for users of `async` + /// + /// Even though + #[gix_protocol::maybe_async::maybe_async] + pub async fn fetch_only

( &mut self, mut progress: P, should_interrupt: &std::sync::atomic::AtomicBool, @@ -101,17 +106,19 @@ impl PrepareFetch { .expect("valid") .to_owned(); let pending_pack: remote::fetch::Prepare<'_, '_, _> = { - let mut connection = remote.connect(remote::Direction::Fetch)?; + let mut connection = remote.connect(remote::Direction::Fetch).await?; if let Some(f) = self.configure_connection.as_mut() { f(&mut connection).map_err(|err| Error::RemoteConnection(err))?; } - connection.prepare_fetch(&mut progress, { - let mut opts = self.fetch_options.clone(); - if !opts.extra_refspecs.contains(&head_refspec) { - opts.extra_refspecs.push(head_refspec) - } - opts - })? + connection + .prepare_fetch(&mut progress, { + let mut opts = self.fetch_options.clone(); + if !opts.extra_refspecs.contains(&head_refspec) { + opts.extra_refspecs.push(head_refspec) + } + opts + }) + .await? }; if pending_pack.ref_map().object_hash != repo.object_hash() { unimplemented!("configure repository to expect a different object hash as advertised by the server") @@ -127,7 +134,8 @@ impl PrepareFetch { message: reflog_message.clone(), }) .with_shallow(self.shallow.clone()) - .receive(progress, should_interrupt)?; + .receive(progress, should_interrupt) + .await?; util::append_config_to_repo_config(repo, config); util::update_head( @@ -141,6 +149,7 @@ impl PrepareFetch { } /// Similar to [`fetch_only()`][Self::fetch_only()`], but passes ownership to a utility type to configure a checkout operation. + #[cfg(feature = "blocking-network-client")] pub fn fetch_then_checkout

( &mut self, progress: P, @@ -155,5 +164,4 @@ impl PrepareFetch { } } -#[cfg(feature = "blocking-network-client")] mod util; diff --git a/gix/src/clone/mod.rs b/gix/src/clone/mod.rs index 43024e0b4e..9ec2261351 100644 --- a/gix/src/clone/mod.rs +++ b/gix/src/clone/mod.rs @@ -160,7 +160,7 @@ mod access_feat { } /// -#[cfg(feature = "blocking-network-client")] +#[cfg(any(feature = "async-network-client-async-std", feature = "blocking-network-client"))] pub mod fetch; /// diff --git a/gix/src/config/tree/keys.rs b/gix/src/config/tree/keys.rs index 4d25d5ba02..b03fa49c6d 100644 --- a/gix/src/config/tree/keys.rs +++ b/gix/src/config/tree/keys.rs @@ -511,7 +511,8 @@ pub mod validate { gix_config::Integer::try_from(value)? .to_decimal() .ok_or_else(|| format!("integer {value} cannot be represented as `usize`"))?, - )?; + ) + .map_err(|_| "cannot use sign for unsigned integer")?; Ok(()) } } diff --git a/gix/src/config/tree/sections/protocol.rs b/gix/src/config/tree/sections/protocol.rs index 58e907b0f8..a0510f2b8d 100644 --- a/gix/src/config/tree/sections/protocol.rs +++ b/gix/src/config/tree/sections/protocol.rs @@ -6,6 +6,8 @@ use crate::{ impl Protocol { /// The `protocol.allow` key. pub const ALLOW: Allow = Allow::new_with_validate("allow", &config::Tree::PROTOCOL, validate::Allow); + /// The `protocol.version` key. + pub const VERSION: Version = Version::new_with_validate("version", &config::Tree::PROTOCOL, validate::Version); /// The `protocol.` subsection pub const NAME_PARAMETER: NameParameter = NameParameter; @@ -14,6 +16,9 @@ impl Protocol { /// The `protocol.allow` key type. pub type Allow = keys::Any; +/// The `protocol.version` key. +pub type Version = keys::Any; + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] mod allow { use std::borrow::Cow; @@ -39,7 +44,7 @@ mod allow { pub struct NameParameter; impl NameParameter { - /// The `credential..helper` key. + /// The `protocol..allow` key. pub const ALLOW: Allow = Allow::new_with_validate("allow", &Protocol::NAME_PARAMETER, validate::Allow); } @@ -63,7 +68,7 @@ impl Section for Protocol { } fn keys(&self) -> &[&dyn Key] { - &[&Self::ALLOW] + &[&Self::ALLOW, &Self::VERSION] } fn sub_sections(&self) -> &[&dyn Section] { @@ -71,6 +76,38 @@ impl Section for Protocol { } } +mod key_impls { + impl super::Version { + /// Convert `value` into the corresponding protocol version, possibly applying the correct default. + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] + pub fn try_into_protocol_version( + &'static self, + value: Option>, + ) -> Result { + let value = match value { + None => return Ok(gix_protocol::transport::Protocol::V2), + Some(v) => v, + }; + Ok(match value { + Ok(0) => gix_protocol::transport::Protocol::V0, + Ok(1) => gix_protocol::transport::Protocol::V1, + Ok(2) => gix_protocol::transport::Protocol::V2, + Ok(other) => { + return Err(crate::config::key::GenericErrorWithValue::from_value( + self, + other.to_string().into(), + )) + } + Err(err) => { + return Err( + crate::config::key::GenericErrorWithValue::from_value(self, "unknown".into()).with_source(err), + ) + } + }) + } + } +} + mod validate { use crate::{bstr::BStr, config::tree::keys}; @@ -82,4 +119,17 @@ mod validate { Ok(()) } } + + pub struct Version; + impl keys::Validate for Version { + fn validate(&self, value: &BStr) -> Result<(), Box> { + let value = gix_config::Integer::try_from(value)? + .to_decimal() + .ok_or_else(|| format!("integer {value} cannot be represented as integer"))?; + match value { + 0 | 1 | 2 => Ok(()), + _ => Err(format!("protocol version {value} is unknown").into()), + } + } + } } diff --git a/gix/src/lib.rs b/gix/src/lib.rs index ca902eedcc..5de702dbfa 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -81,6 +81,7 @@ pub use gix_ignore as ignore; #[doc(inline)] pub use gix_index as index; pub use gix_lock as lock; +pub use gix_negotiate as negotiate; pub use gix_object as objs; pub use gix_object::bstr; pub use gix_odb as odb; diff --git a/gix/src/remote/connect.rs b/gix/src/remote/connect.rs index 3dbd934862..63475b7c5c 100644 --- a/gix/src/remote/connect.rs +++ b/gix/src/remote/connect.rs @@ -24,8 +24,8 @@ mod error { Connect(#[from] gix_protocol::transport::client::connect::Error), #[error("The {} url was missing - don't know where to establish a connection to", direction.as_str())] MissingUrl { direction: remote::Direction }, - #[error("Protocol named {given:?} is not a valid protocol. Choose between 1 and 2")] - UnknownProtocol { given: BString }, + #[error("The given protocol version was invalid. Choose between 1 and 2")] + UnknownProtocol { source: config::key::GenericErrorWithValue }, #[error("Could not verify that \"{}\" url is a valid git directory before attempting to use it", url.to_bstring())] FileUrl { source: Box, @@ -128,25 +128,9 @@ impl<'repo> Remote<'repo> { Ok(url) } - use gix_protocol::transport::Protocol; - let version = self - .repo - .config - .resolved - .integer("protocol", None, "version") - .unwrap_or(Ok(2)) - .map_err(|err| Error::UnknownProtocol { given: err.input }) - .and_then(|num| { - Ok(match num { - 1 => Protocol::V1, - 2 => Protocol::V2, - num => { - return Err(Error::UnknownProtocol { - given: num.to_string().into(), - }) - } - }) - })?; + let version = crate::config::tree::Protocol::VERSION + .try_into_protocol_version(self.repo.config.resolved.integer("protocol", None, "version")) + .map_err(|err| Error::UnknownProtocol { source: err })?; let url = self.url(direction).ok_or(Error::MissingUrl { direction })?.to_owned(); if !self.repo.config.url_scheme()?.allow(&url.scheme) { diff --git a/gix/src/remote/connection/fetch/mod.rs b/gix/src/remote/connection/fetch/mod.rs index 8b8f211543..b4fe009354 100644 --- a/gix/src/remote/connection/fetch/mod.rs +++ b/gix/src/remote/connection/fetch/mod.rs @@ -43,6 +43,8 @@ impl RefLogMessage { pub enum Status { /// Nothing changed as the remote didn't have anything new compared to our tracking branches, thus no pack was received /// and no new object was added. + /// + /// As we could determine that nothing changed without remote interaction, there was no negotiation at all. NoPackReceived { /// However, depending on the refspecs, references might have been updated nonetheless to point to objects as /// reported by the remote. @@ -50,6 +52,8 @@ pub enum Status { }, /// There was at least one tip with a new object which we received. Change { + /// The number of rounds it took to minimize the pack to contain only the objects we don't have. + negotiation_rounds: usize, /// Information collected while writing the pack and its index. write_pack_bundle: gix_pack::bundle::write::Outcome, /// Information collected while updating references. @@ -58,6 +62,8 @@ pub enum Status { /// A dry run was performed which leaves the local repository without any change /// nor will a pack have been received. DryRun { + /// The number of rounds it took to minimize the *would-be-sent*-pack to contain only the objects we don't have. + negotiation_rounds: usize, /// Information about what updates to refs would have been done. update_refs: refs::update::Outcome, }, diff --git a/gix/src/remote/connection/fetch/negotiate.rs b/gix/src/remote/connection/fetch/negotiate.rs index 985d7bd259..be3f8e3414 100644 --- a/gix/src/remote/connection/fetch/negotiate.rs +++ b/gix/src/remote/connection/fetch/negotiate.rs @@ -1,5 +1,11 @@ use crate::remote::fetch; -use crate::remote::fetch::negotiate::Algorithm; +use crate::remote::fetch::Shallow; +use gix_negotiate::Flags; +use gix_odb::HeaderExt; +use gix_pack::Find; +use std::borrow::Cow; + +type Queue = gix_revision::PriorityQueue; /// The error returned during negotiation. #[derive(Debug, thiserror::Error)] @@ -7,80 +13,336 @@ use crate::remote::fetch::negotiate::Algorithm; pub enum Error { #[error("We were unable to figure out what objects the server should send after {rounds} round(s)")] NegotiationFailed { rounds: usize }, + #[error(transparent)] + LookupCommitInGraph(#[from] gix_revision::graph::lookup::commit::Error), + #[error(transparent)] + InitRefsIterator(#[from] crate::reference::iter::init::Error), + #[error(transparent)] + InitRefsIteratorPlatform(#[from] crate::reference::iter::Error), + #[error(transparent)] + ObtainRefDuringIteration(#[from] Box), + #[error(transparent)] + LoadIndex(#[from] gix_odb::store::load_index::Error), } -/// Negotiate one round with `algo` by looking at `ref_map` and adjust `arguments` to contain the haves and wants. -/// If this is not the first round, the `previous_response` is set with the last recorded server response. -/// Returns `true` if the negotiation is done from our side so the server won't keep asking. -#[allow(clippy::too_many_arguments)] -pub(crate) fn one_round( - algo: Algorithm, - round: usize, +#[must_use] +pub(crate) enum Action { + /// None of the remote refs moved compared to our last recorded state (via tracking refs), so there is nothing to do at all, + /// not even a ref update. + NoChange, + /// Don't negotiate, don't fetch the pack, skip right to updating the references. + /// + /// This happens if we already have all local objects even though the server seems to have changed. + SkipToRefUpdate, + /// We can't know for sure if fetching *is not* needed, so we go ahead and negotiate. + MustNegotiate { + /// Each `ref_map.mapping` has a slot here which is `true` if we have the object the remote ref points to locally. + remote_ref_target_known: Vec, + }, +} + +/// This function is modeled after the similarly named one in the git codebase to do the following: +/// +/// * figure out all advertised refs on the remote *that we already have* and keep track of the oldest one as cutoff date. +/// * mark all of our own refs as tips for a traversal. +/// * mark all their parents, recursively, up to (and including) the cutoff date up to which we have seen the servers commit that we have. +/// * pass all known-to-be-common-with-remote commits to the negotiator as common commits. +/// +/// This is done so that we already find the most recent common commits, even if we are ahead, which is *potentially* better than +/// what we would get if we would rely on tracking refs alone, particularly if one wouldn't trust the tracking refs for some reason. +/// +/// Note that git doesn't trust its own tracking refs as the server *might* have changed completely, for instance by force-pushing, so +/// marking our local tracking refs as known is something that's actually not proven to be correct so it's not done. +/// +/// Additionally, it does what's done in `transport.c` and we check if a fetch is actually needed as at least one advertised ref changed. +/// +/// Finally, we also mark tips in the `negotiator` in one go to avoid traversing all refs twice, since we naturally encounter all tips during +/// our own walk. +/// +/// Return whether or not we should negotiate, along with a queue for later use. +pub(crate) fn mark_complete_and_common_ref( repo: &crate::Repository, + negotiator: &mut dyn gix_negotiate::Negotiator, + graph: &mut gix_negotiate::Graph<'_>, ref_map: &fetch::RefMap, - fetch_tags: fetch::Tags, - arguments: &mut gix_protocol::fetch::Arguments, - _previous_response: Option<&gix_protocol::fetch::Response>, - shallow: Option<&fetch::Shallow>, -) -> Result { - let tag_refspec_to_ignore = fetch_tags - .to_refspec() - .filter(|_| matches!(fetch_tags, crate::remote::fetch::Tags::Included)); - if let Some(fetch::Shallow::Deepen(0)) = shallow { + shallow: &fetch::Shallow, +) -> Result { + if let fetch::Shallow::Deepen(0) = shallow { // Avoid deepening (relative) with zero as it seems to upset the server. Git also doesn't actually // perform the negotiation for some reason (couldn't find it in code). - return Ok(true); + return Ok(Action::NoChange); } + if let Some(fetch::Mapping { + remote: fetch::Source::Ref(gix_protocol::handshake::Ref::Unborn { .. }), + .. + }) = ref_map.mappings.last().filter(|_| ref_map.mappings.len() == 1) + { + // There is only an unborn branch, as the remote has an empty repository. This means there is nothing to do except for + // possibly reproducing the unborn branch locally. + return Ok(Action::SkipToRefUpdate); + } + + // Compute the cut-off date by checking which of the refs advertised (and matched in refspecs) by the remote we have, + // and keep the oldest one. + let mut cutoff_date = None::; + let mut num_mappings_with_change = 0; + let mut remote_ref_target_known: Vec = std::iter::repeat(false).take(ref_map.mappings.len()).collect(); - match algo { - Algorithm::Noop | Algorithm::Skipping | Algorithm::Consecutive => { - todo!() + for (mapping_idx, mapping) in ref_map.mappings.iter().enumerate() { + let want_id = mapping.remote.as_id(); + let have_id = mapping.local.as_ref().and_then(|name| { + // this is the only time git uses the peer-id. + let r = repo.find_reference(name).ok()?; + r.target().try_id().map(ToOwned::to_owned) + }); + + // Like git, we don't let known unchanged mappings participate in the tree traversal + if want_id.zip(have_id).map_or(true, |(want, have)| want != have) { + num_mappings_with_change += 1; } - Algorithm::Naive => { - assert_eq!(round, 1, "Naive always finishes after the first round, it claims."); - let mut has_missing_tracking_branch = false; - for mapping in &ref_map.mappings { - if tag_refspec_to_ignore.map_or(false, |tag_spec| { - mapping - .spec_index - .implicit_index() - .and_then(|idx| ref_map.extra_refspecs.get(idx)) - .map_or(false, |spec| spec.to_ref() == tag_spec) - }) { - continue; - } - let have_id = mapping.local.as_ref().and_then(|name| { - repo.find_reference(name) - .ok() - .and_then(|r| r.target().try_id().map(ToOwned::to_owned)) - }); - match have_id { - Some(have_id) => { - if let Some(want_id) = mapping.remote.as_id() { - if want_id != have_id { - arguments.want(want_id); - arguments.have(have_id); - } - } - } - None => { - if let Some(want_id) = mapping.remote.as_id() { - arguments.want(want_id); - has_missing_tracking_branch = true; - } - } - } + + if let Some(commit) = want_id + .and_then(|id| graph.try_lookup_or_insert_commit(id.into(), |_| {}).transpose()) + .transpose()? + { + remote_ref_target_known[mapping_idx] = true; + cutoff_date = cutoff_date.unwrap_or_default().max(commit.commit_time).into(); + } else if want_id.map_or(false, |maybe_annotated_tag| repo.objects.contains(maybe_annotated_tag)) { + remote_ref_target_known[mapping_idx] = true; + } + } + + // If any kind of shallowing operation is desired, the server may still create a pack for us. + if matches!(shallow, Shallow::NoChange) { + if num_mappings_with_change == 0 { + return Ok(Action::NoChange); + } else if remote_ref_target_known.iter().all(|known| *known) { + return Ok(Action::SkipToRefUpdate); + } + } + + // color our commits as complete as identified by references, unconditionally + // (`git` is conditional here based on `deepen`, but it doesn't make sense and it's hard to extract from history when that happened). + let mut queue = Queue::new(); + mark_all_refs_in_repo(repo, graph, &mut queue, Flags::COMPLETE)?; + mark_alternate_complete(repo, graph, &mut queue)?; + // Keep track of the tips, which happen to be on our queue right, before we traverse the graph with cutoff. + let tips = if let Some(cutoff) = cutoff_date { + let tips = Cow::Owned(queue.clone()); + // color all their parents up to the cutoff date, the oldest commit we know the server has. + mark_recent_complete_commits(&mut queue, graph, cutoff)?; + tips + } else { + Cow::Borrowed(&queue) + }; + + // mark all complete advertised refs as common refs. + for mapping in ref_map + .mappings + .iter() + .zip(remote_ref_target_known.iter().copied()) + // We need this filter as the graph wouldn't contain annotated tags. + .filter_map(|(mapping, known)| (!known).then_some(mapping)) + { + let want_id = mapping.remote.as_id(); + if let Some(common_id) = want_id + .and_then(|id| graph.get(id).map(|c| (c, id))) + .filter(|(c, _)| c.data.flags.contains(Flags::COMPLETE)) + .map(|(_, id)| id) + { + negotiator.known_common(common_id.into(), graph)?; + } + } + + // As negotiators currently may rely on getting `known_common` calls first and tips after, we adhere to that which is the only + // reason we cached the set of tips. + for tip in tips.iter_unordered() { + negotiator.add_tip(*tip, graph)?; + } + + Ok(Action::MustNegotiate { + remote_ref_target_known, + }) +} + +/// Add all `wants` to `arguments`, which is the unpeeled direct target that the advertised remote ref points to. +pub(crate) fn add_wants( + repo: &crate::Repository, + arguments: &mut gix_protocol::fetch::Arguments, + ref_map: &fetch::RefMap, + mapping_known: &[bool], + shallow: &fetch::Shallow, + fetch_tags: fetch::Tags, +) { + // With included tags, we have to keep mappings of tags to handle them later when updating refs, but we don't want to + // explicitly `want` them as the server will determine by itself which tags are pointing to a commit it wants to send. + // If we would not exclude implicit tag mappings like this, we would get too much of the graph. + let tag_refspec_to_ignore = matches!(fetch_tags, crate::remote::fetch::Tags::Included) + .then(|| fetch_tags.to_refspec()) + .flatten(); + + // When using shallow, we can't exclude `wants` as the remote won't send anything then. Thus we have to resend everything + // we have as want instead to get exactly the same graph, but possibly deepened. + let is_shallow = !matches!(shallow, fetch::Shallow::NoChange); + let wants = ref_map + .mappings + .iter() + .zip(mapping_known) + .filter_map(|(m, known)| (is_shallow || !*known).then_some(m)); + for want in wants { + // Here we ignore implicit tag mappings if needed. + if tag_refspec_to_ignore.map_or(false, |tag_spec| { + want.spec_index + .implicit_index() + .and_then(|idx| ref_map.extra_refspecs.get(idx)) + .map_or(false, |spec| spec.to_ref() == tag_spec) + }) { + continue; + } + let id_on_remote = want.remote.as_id(); + if !arguments.can_use_ref_in_want() || matches!(want.remote, fetch::Source::ObjectId(_)) { + if let Some(id) = id_on_remote { + arguments.want(id); } + } else { + arguments.want_ref( + want.remote + .as_name() + .expect("name available if this isn't an object id"), + ) + } + let id_is_annotated_tag_we_have = id_on_remote + .and_then(|id| repo.objects.header(id).ok().map(|h| (id, h))) + .filter(|(_, h)| h.kind() == gix_object::Kind::Tag) + .map(|(id, _)| id); + if let Some(tag_on_remote) = id_is_annotated_tag_we_have { + // Annotated tags are not handled at all by negotiators in the commit-graph - they only see commits and thus won't + // ever add `have`s for tags. To correct for that, we add these haves here to avoid getting them sent again. + arguments.have(tag_on_remote) + } + } +} - if has_missing_tracking_branch || (shallow.is_some() && arguments.is_empty()) { - if let Ok(Some(r)) = repo.head_ref() { - if let Some(id) = r.target().try_id() { - arguments.have(id); - arguments.want(id); +/// Remove all commits that are more recent than the cut-off, which is the commit time of the oldest common commit we have with the server. +fn mark_recent_complete_commits( + queue: &mut Queue, + graph: &mut gix_negotiate::Graph<'_>, + cutoff: gix_revision::graph::CommitterTimestamp, +) -> Result<(), Error> { + while let Some(id) = queue + .peek() + .and_then(|(commit_time, id)| (commit_time >= &cutoff).then_some(*id)) + { + queue.pop(); + let commit = graph.get(&id).expect("definitely set when adding tips or parents"); + for parent_id in commit.parents.clone() { + let mut was_complete = false; + if let Some(parent) = graph + .try_lookup_or_insert_commit(parent_id, |md| { + was_complete = md.flags.contains(Flags::COMPLETE); + md.flags |= Flags::COMPLETE + })? + .filter(|_| !was_complete) + { + queue.insert(parent.commit_time, parent_id) + } + } + } + Ok(()) +} + +fn mark_all_refs_in_repo( + repo: &crate::Repository, + graph: &mut gix_negotiate::Graph<'_>, + queue: &mut Queue, + mark: Flags, +) -> Result<(), Error> { + for local_ref in repo.references()?.all()?.peeled() { + let local_ref = local_ref?; + let id = local_ref.id().detach(); + let mut is_complete = false; + if let Some(commit) = graph + .try_lookup_or_insert_commit(id, |md| { + is_complete = md.flags.contains(Flags::COMPLETE); + md.flags |= mark + })? + .filter(|_| !is_complete) + { + queue.insert(commit.commit_time, id); + }; + } + Ok(()) +} + +fn mark_alternate_complete( + repo: &crate::Repository, + graph: &mut gix_negotiate::Graph<'_>, + queue: &mut Queue, +) -> Result<(), Error> { + for alternate_repo in repo + .objects + .store_ref() + .alternate_db_paths()? + .into_iter() + .filter_map(|path| { + path.ancestors() + .nth(1) + .and_then(|git_dir| crate::open_opts(git_dir, repo.options.clone()).ok()) + }) + { + mark_all_refs_in_repo(&alternate_repo, graph, queue, Flags::ALTERNATE | Flags::COMPLETE)?; + } + Ok(()) +} + +/// Negotiate the nth `round` with `negotiator` sending `haves_to_send` after possibly making the known common commits +/// as sent by the remote known to `negotiator` using `previous_response` if this isn't the first round. +/// All `haves` are added to `arguments` accordingly. +/// Returns the amount of haves actually sent. +pub(crate) fn one_round( + negotiator: &mut dyn gix_negotiate::Negotiator, + graph: &mut gix_negotiate::Graph<'_>, + haves_to_send: usize, + arguments: &mut gix_protocol::fetch::Arguments, + previous_response: Option<&gix_protocol::fetch::Response>, + mut common: Option<&mut Vec>, +) -> Result<(usize, bool), Error> { + let mut seen_ack = false; + if let Some(response) = previous_response { + use gix_protocol::fetch::response::Acknowledgement; + for ack in response.acknowledgements() { + match ack { + Acknowledgement::Common(id) => { + seen_ack = true; + negotiator.in_common_with_remote(*id, graph)?; + if let Some(ref mut common) = common { + common.push(*id); } } + Acknowledgement::Ready => { + // NOTE: In git, there is some logic dealing with whether to expect a DELIM or FLUSH package, + // but we handle this with peeking. + } + Acknowledgement::Nak => {} } - Ok(true) } } + + // `common` is set only if this is a stateless transport, and we repeat previously confirmed common commits as HAVE, because + // we are not going to repeat them otherwise. + if let Some(common) = common { + for have_id in common { + arguments.have(have_id); + } + } + + let mut haves_sent = 0; + for have_id in (0..haves_to_send).map_while(|_| negotiator.next_have(graph)) { + arguments.have(have_id?); + haves_sent += 1; + } + // Note that we are differing from the git implementation, which does an extra-round of with no new haves sent at all. + // For us it seems better to just say we are done when we know we are done, as potentially additional acks won't affect the + // queue of any of our implementation at all (so the negotiator won't come up with more haves next time either). + Ok((haves_sent, seen_ack)) } diff --git a/gix/src/remote/connection/fetch/receive_pack.rs b/gix/src/remote/connection/fetch/receive_pack.rs index c7dffd892b..33b4cb302f 100644 --- a/gix/src/remote/connection/fetch/receive_pack.rs +++ b/gix/src/remote/connection/fetch/receive_pack.rs @@ -1,5 +1,7 @@ +use std::ops::DerefMut; use std::sync::atomic::{AtomicBool, Ordering}; +use gix_odb::store::RefreshMode; use gix_odb::FindExt; use gix_protocol::{ fetch::Arguments, @@ -102,9 +104,6 @@ where } let (shallow_commits, mut shallow_lock) = add_shallow_args(&mut arguments, &self.shallow, repo)?; - let mut previous_response = None::; - let mut round = 1; - if self.ref_map.object_hash != repo.object_hash() { return Err(Error::IncompatibleObjectHash { local: repo.object_hash(), @@ -112,127 +111,161 @@ where }); } - let algorithm = repo + let mut negotiator = repo .config .resolved .string_by_key(Fetch::NEGOTIATION_ALGORITHM.logical_name().as_str()) .map(|n| Fetch::NEGOTIATION_ALGORITHM.try_into_negotiation_algorithm(n)) .transpose() .with_leniency(repo.config.lenient_config)? - .unwrap_or(Algorithm::Naive); // TODO: use the default instead once consecutive is implemented - - let reader = 'negotiation: loop { - progress.step(); - progress.set_name(format!("negotiate (round {round})")); - - let is_done = match negotiate::one_round( - algorithm, - round, - repo, - &self.ref_map, - con.remote.fetch_tags, - &mut arguments, - previous_response.as_ref(), - (self.shallow != Shallow::NoChange).then_some(&self.shallow), - ) { - Ok(_) if arguments.is_empty() => { - gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); - let update_refs = refs::update( - repo, - self.reflog_message - .take() - .unwrap_or_else(|| RefLogMessage::Prefixed { action: "fetch".into() }), - &self.ref_map.mappings, - con.remote.refspecs(remote::Direction::Fetch), - &self.ref_map.extra_refspecs, - con.remote.fetch_tags, - self.dry_run, - self.write_packed_refs, - )?; - return Ok(Outcome { - ref_map: std::mem::take(&mut self.ref_map), - status: Status::NoPackReceived { update_refs }, - }); - } - Ok(is_done) => is_done, - Err(err) => { - gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); - return Err(err.into()); - } - }; - round += 1; - let mut reader = arguments.send(&mut con.transport, is_done).await?; - if sideband_all { - setup_remote_progress(progress, &mut reader, should_interrupt); - } - let response = gix_protocol::fetch::Response::from_line_reader(protocol_version, &mut reader).await?; - if response.has_pack() { - progress.step(); - progress.set_name("receiving pack"); - if !sideband_all { - setup_remote_progress(progress, &mut reader, should_interrupt); - } - previous_response = Some(response); - break 'negotiation reader; - } else { - previous_response = Some(response); - } + .unwrap_or(Algorithm::Consecutive) + .into_negotiator(); + let graph_repo = { + let mut r = repo.clone(); + // assure that checking for unknown server refs doesn't trigger ODB refreshes. + r.objects.refresh = RefreshMode::Never; + // we cache everything of importance in the graph and thus don't need an object cache. + r.objects.unset_object_cache(); + r }; - let previous_response = previous_response.expect("knowledge of a pack means a response was received"); - if !previous_response.shallow_updates().is_empty() && shallow_lock.is_none() { - let reject_shallow_remote = repo - .config - .resolved - .boolean_filter_by_key("clone.rejectShallow", &mut repo.filter_config_section()) - .map(|val| Clone::REJECT_SHALLOW.enrich_error(val)) - .transpose()? - .unwrap_or(false); - if reject_shallow_remote { - return Err(Error::RejectShallowRemote); + let mut graph = graph_repo.commit_graph(); + let action = negotiate::mark_complete_and_common_ref( + &graph_repo, + negotiator.deref_mut(), + &mut graph, + &self.ref_map, + &self.shallow, + )?; + let mut previous_response = None::; + let mut round = 1; + let mut write_pack_bundle = match &action { + negotiate::Action::NoChange | negotiate::Action::SkipToRefUpdate => { + gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); + None } - shallow_lock = acquire_shallow_lock(repo).map(Some)?; - } + negotiate::Action::MustNegotiate { + remote_ref_target_known, + } => { + negotiate::add_wants( + repo, + &mut arguments, + &self.ref_map, + remote_ref_target_known, + &self.shallow, + con.remote.fetch_tags, + ); + let is_stateless = + arguments.is_stateless(!con.transport.connection_persists_across_multiple_requests()); + let mut haves_to_send = gix_negotiate::window_size(is_stateless, None); + let mut seen_ack = false; + let mut in_vain = 0; + let mut common = is_stateless.then(Vec::new); + let reader = 'negotiation: loop { + progress.step(); + progress.set_name(format!("negotiate (round {round})")); - let options = gix_pack::bundle::write::Options { - thread_limit: config::index_threads(repo)?, - index_version: config::pack_index_version(repo)?, - iteration_mode: gix_pack::data::input::Mode::Verify, - object_hash: con.remote.repo.object_hash(), - }; + let is_done = match negotiate::one_round( + negotiator.deref_mut(), + &mut graph, + haves_to_send, + &mut arguments, + previous_response.as_ref(), + common.as_mut(), + ) { + Ok((haves_sent, ack_seen)) => { + if ack_seen { + in_vain = 0; + } + seen_ack |= ack_seen; + in_vain += haves_sent; + let is_done = haves_sent != haves_to_send || (seen_ack && in_vain >= 256); + haves_to_send = gix_negotiate::window_size(is_stateless, haves_to_send); + is_done + } + Err(err) => { + gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); + return Err(err.into()); + } + }; + let mut reader = arguments.send(&mut con.transport, is_done).await?; + if sideband_all { + setup_remote_progress(progress, &mut reader, should_interrupt); + } + let response = + gix_protocol::fetch::Response::from_line_reader(protocol_version, &mut reader, is_done).await?; + let has_pack = response.has_pack(); + previous_response = Some(response); + if has_pack { + progress.step(); + progress.set_name("receiving pack"); + if !sideband_all { + setup_remote_progress(progress, &mut reader, should_interrupt); + } + break 'negotiation reader; + } else { + round += 1; + } + }; + drop(graph); + drop(graph_repo); + let previous_response = previous_response.expect("knowledge of a pack means a response was received"); + if !previous_response.shallow_updates().is_empty() && shallow_lock.is_none() { + let reject_shallow_remote = repo + .config + .resolved + .boolean_filter_by_key("clone.rejectShallow", &mut repo.filter_config_section()) + .map(|val| Clone::REJECT_SHALLOW.enrich_error(val)) + .transpose()? + .unwrap_or(false); + if reject_shallow_remote { + return Err(Error::RejectShallowRemote); + } + shallow_lock = acquire_shallow_lock(repo).map(Some)?; + } - let mut write_pack_bundle = if matches!(self.dry_run, fetch::DryRun::No) { - Some(gix_pack::Bundle::write_to_directory( - #[cfg(feature = "async-network-client")] - { - gix_protocol::futures_lite::io::BlockOn::new(reader) - }, - #[cfg(not(feature = "async-network-client"))] - { - reader - }, - Some(repo.objects.store_ref().path().join("pack")), - progress, - should_interrupt, - Some(Box::new({ - let repo = repo.clone(); - move |oid, buf| repo.objects.find(oid, buf).ok() - })), - options, - )?) - } else { - drop(reader); - None - }; + let options = gix_pack::bundle::write::Options { + thread_limit: config::index_threads(repo)?, + index_version: config::pack_index_version(repo)?, + iteration_mode: gix_pack::data::input::Mode::Verify, + object_hash: con.remote.repo.object_hash(), + }; - if matches!(protocol_version, gix_protocol::transport::Protocol::V2) { - gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); - } + let write_pack_bundle = if matches!(self.dry_run, fetch::DryRun::No) { + Some(gix_pack::Bundle::write_to_directory( + #[cfg(feature = "async-network-client")] + { + gix_protocol::futures_lite::io::BlockOn::new(reader) + }, + #[cfg(not(feature = "async-network-client"))] + { + reader + }, + Some(repo.objects.store_ref().path().join("pack")), + progress, + should_interrupt, + Some(Box::new({ + let repo = repo.clone(); + move |oid, buf| repo.objects.find(oid, buf).ok() + })), + options, + )?) + } else { + drop(reader); + None + }; + + if matches!(protocol_version, gix_protocol::transport::Protocol::V2) { + gix_protocol::indicate_end_of_interaction(&mut con.transport).await.ok(); + } - if let Some(shallow_lock) = shallow_lock { - if !previous_response.shallow_updates().is_empty() { - crate::shallow::write(shallow_lock, shallow_commits, previous_response.shallow_updates())?; + if let Some(shallow_lock) = shallow_lock { + if !previous_response.shallow_updates().is_empty() { + crate::shallow::write(shallow_lock, shallow_commits, previous_response.shallow_updates())?; + } + } + write_pack_bundle } - } + }; let update_refs = refs::update( repo, @@ -255,16 +288,26 @@ where } } - Ok(Outcome { + let out = Outcome { ref_map: std::mem::take(&mut self.ref_map), - status: match write_pack_bundle { - Some(write_pack_bundle) => Status::Change { - write_pack_bundle, + status: if matches!(self.dry_run, fetch::DryRun::Yes) { + assert!(write_pack_bundle.is_none(), "in dry run we never read a bundle"); + Status::DryRun { update_refs, - }, - None => Status::DryRun { update_refs }, + negotiation_rounds: round, + } + } else { + match write_pack_bundle { + Some(write_pack_bundle) => Status::Change { + write_pack_bundle, + update_refs, + negotiation_rounds: round, + }, + None => Status::NoPackReceived { update_refs }, + } }, - }) + }; + Ok(out) } } diff --git a/gix/src/remote/fetch.rs b/gix/src/remote/fetch.rs index c6190afc38..c109558f18 100644 --- a/gix/src/remote/fetch.rs +++ b/gix/src/remote/fetch.rs @@ -1,24 +1,14 @@ /// pub mod negotiate { - /// The way the negotiation is performed. - #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] - pub enum Algorithm { - /// Do not send any information at all, likely at cost of larger-than-necessary packs. - Noop, - /// Walk over consecutive commits and check each one. This can be costly be assures packs are exactly the size they need to be. - #[default] - Consecutive, - /// Like `Consecutive`, but skips commits to converge faster, at the cost of receiving packs that are larger than they have to be. - Skipping, - /// Our very own implementation that probably should be replaced by one of the known algorithms soon. - Naive, - } + pub use gix_negotiate::Algorithm; #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] pub use super::super::connection::fetch::negotiate::Error; #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] - pub(crate) use super::super::connection::fetch::negotiate::one_round; + pub(crate) use super::super::connection::fetch::negotiate::{ + add_wants, mark_complete_and_common_ref, one_round, Action, + }; } #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] @@ -154,7 +144,7 @@ pub enum Source { #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] impl Source { /// Return either the direct object id we refer to or the direct target that a reference refers to. - /// The latter may be a direct or a symbolic reference, and we degenerate this to the peeled object id. + /// The latter may be a direct or a symbolic reference. /// If unborn, `None` is returned. pub fn as_id(&self) -> Option<&gix_hash::oid> { match self { @@ -163,6 +153,17 @@ impl Source { } } + /// Returns the peeled id of this instance, that is the object that can't be de-referenced anymore. + pub fn peeled_id(&self) -> Option<&gix_hash::oid> { + match self { + Source::ObjectId(id) => Some(id), + Source::Ref(r) => { + let (_name, target, peeled) = r.unpack(); + peeled.or(target) + } + } + } + /// Return ourselves as the full name of the reference we represent, or `None` if this source isn't a reference but an object. pub fn as_name(&self) -> Option<&crate::bstr::BStr> { match self { diff --git a/gix/src/repository/graph.rs b/gix/src/repository/graph.rs new file mode 100644 index 0000000000..a1f6c7f89a --- /dev/null +++ b/gix/src/repository/graph.rs @@ -0,0 +1,24 @@ +use gix_odb::Find; + +impl crate::Repository { + /// Create a graph data-structure capable of accelerating graph traversals and storing state of type `T` with each commit + /// it encountered. + /// + /// Note that the commitgraph will be used if it is present and readable, but it won't be an error if it is corrupted. In that case, + /// it will just not be used. + /// + /// ### Performance + /// + /// Note that the [Graph][gix_revision::Graph] can be sensitive to various object database settings that may affect the performance + /// of the commit walk. + pub fn commit_graph(&self) -> gix_revision::Graph<'_, T> { + gix_revision::Graph::new( + |id, buf| { + self.objects + .try_find(id, buf) + .map(|r| r.and_then(|d| d.try_into_commit_iter())) + }, + gix_commitgraph::at(self.objects.store_ref().path().join("info")).ok(), + ) + } +} diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index 7248bae17e..f8a51e8d05 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -37,6 +37,7 @@ mod attributes; mod cache; mod config; mod excludes; +mod graph; pub(crate) mod identity; mod impls; mod init; diff --git a/gix/tests/clone/mod.rs b/gix/tests/clone/mod.rs index ca12d29a9d..a8af782de4 100644 --- a/gix/tests/clone/mod.rs +++ b/gix/tests/clone/mod.rs @@ -480,50 +480,52 @@ mod blocking_io { #[test] fn fetch_and_checkout_empty_remote_repo() -> crate::Result { - let tmp = gix_testtools::tempfile::TempDir::new()?; - let mut prepare = gix::clone::PrepareFetch::new( - gix_testtools::scripted_fixture_read_only("make_empty_repo.sh")?, - tmp.path(), - gix::create::Kind::WithWorktree, - Default::default(), - restricted(), - )?; - let (mut checkout, out) = prepare - .fetch_then_checkout(gix::progress::Discard, &std::sync::atomic::AtomicBool::default()) - .unwrap(); - let (repo, _) = checkout.main_worktree(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?; - - assert!(!repo.index_path().is_file(), "newly initialized repos have no index"); - let head = repo.head()?; - assert!(head.is_unborn()); - - assert!( - head.log_iter().all()?.is_none(), - "no reflog for unborn heads (as it needs non-null destination hash)" - ); - - if out - .ref_map - .handshake - .capabilities - .capability("ls-refs") - .expect("has ls-refs") - .supports("unborn") - == Some(true) - { - assert_eq!( - head.referent_name().expect("present").as_bstr(), - "refs/heads/special", - "we pick up the name as present on the server, not the one we default to" - ); - } else { - assert_eq!( - head.referent_name().expect("present").as_bstr(), - "refs/heads/main", - "we simply keep our own post-init HEAD which defaults to the branch name we configured locally" + for version in [ + gix::protocol::transport::Protocol::V0, + gix::protocol::transport::Protocol::V2, + ] { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut prepare = gix::clone::PrepareFetch::new( + gix_testtools::scripted_fixture_read_only("make_empty_repo.sh")?, + tmp.path(), + gix::create::Kind::WithWorktree, + Default::default(), + restricted().config_overrides(Some(format!("protocol.version={}", version as u8))), + )?; + let (mut checkout, out) = + prepare.fetch_then_checkout(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?; + let (repo, _) = + checkout.main_worktree(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?; + + assert!(!repo.index_path().is_file(), "newly initialized repos have no index"); + let head = repo.head()?; + assert!(head.is_unborn()); + + assert!( + head.log_iter().all()?.is_none(), + "no reflog for unborn heads (as it needs non-null destination hash)" ); - } + let supports_unborn = out + .ref_map + .handshake + .capabilities + .capability("ls-refs") + .map_or(false, |cap| cap.supports("unborn").unwrap_or(false)); + if supports_unborn { + assert_eq!( + head.referent_name().expect("present").as_bstr(), + "refs/heads/special", + "we pick up the name as present on the server, not the one we default to" + ); + } else { + assert_eq!( + head.referent_name().expect("present").as_bstr(), + "refs/heads/main", + "we simply keep our own post-init HEAD which defaults to the branch name we configured locally" + ); + } + } Ok(()) } diff --git a/gix/tests/config/tree.rs b/gix/tests/config/tree.rs index ddbcf42d41..be69d428d4 100644 --- a/gix/tests/config/tree.rs +++ b/gix/tests/config/tree.rs @@ -68,6 +68,34 @@ mod keys { .validate("https://github.com/byron/gitoxide".into()) .is_ok()); } + + #[test] + fn unsigned_integer() { + for valid in [0, 1, 100_124] { + assert!(gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT + .validate(valid.to_string().as_bytes().into()) + .is_ok()); + } + + for invalid in [-1, -100] { + assert_eq!( + gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT + .validate(invalid.to_string().as_str().into()) + .unwrap_err() + .to_string(), + "cannot use sign for unsigned integer" + ); + } + + let out_of_bounds = ((i64::MAX as u64) + 1).to_string(); + assert_eq!( + gix::config::tree::Core::DELTA_BASE_CACHE_LIMIT + .validate(out_of_bounds.as_bytes().into()) + .unwrap_err() + .to_string(), + "Could not decode '9223372036854775808': Integers needs to be positive or negative numbers which may have a suffix like 1k, 42, or 50G" + ); + } } mod branch { @@ -482,17 +510,16 @@ mod pack { } } -#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] mod protocol { - use gix::{ - config::tree::{protocol, Key, Protocol}, - remote::url::scheme_permission::Allow, - }; - - use crate::config::tree::bcow; + use gix::config::tree::{Key, Protocol}; + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] #[test] fn allow() -> crate::Result { + use crate::config::tree::bcow; + use gix::config::tree::protocol; + use gix::remote::url::scheme_permission::Allow; + for (key, protocol_name_parameter) in [ (&Protocol::ALLOW, None), (&protocol::NameParameter::ALLOW, Some("http")), @@ -518,6 +545,43 @@ mod protocol { } Ok(()) } + + #[test] + fn version() { + for valid in [0, 1, 2] { + assert!(Protocol::VERSION.validate(valid.to_string().as_str().into()).is_ok()); + } + + assert_eq!( + Protocol::VERSION.validate("5".into()).unwrap_err().to_string(), + "protocol version 5 is unknown" + ); + + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] + { + for (valid, expected) in [ + (None, gix_protocol::transport::Protocol::V2), + (Some(0), gix_protocol::transport::Protocol::V0), + (Some(1), gix_protocol::transport::Protocol::V1), + (Some(2), gix_protocol::transport::Protocol::V2), + ] { + assert_eq!( + Protocol::VERSION + .try_into_protocol_version(valid.map(Ok)) + .expect("valid version"), + expected + ); + } + + assert_eq!( + Protocol::VERSION + .try_into_protocol_version(Some(Ok(5))) + .unwrap_err() + .to_string(), + "The key \"protocol.version=5\" was invalid" + ); + } + } } mod gitoxide { diff --git a/gix/tests/fixtures/make_remote_repos.sh b/gix/tests/fixtures/make_remote_repos.sh index 85d2775d7b..76cd2e4eb8 100644 --- a/gix/tests/fixtures/make_remote_repos.sh +++ b/gix/tests/fixtures/make_remote_repos.sh @@ -308,3 +308,45 @@ git clone --shared base detached-head (cd detached-head git checkout @~1 ) + +function commit() { + local message=${1:?first argument is the commit message} + local file="$message.t" + echo "$1" > "$file" + git add -- "$file" + tick + git commit -m "$message" + git tag "$message" +} + +function optimize_repo() { + git commit-graph write --no-progress --reachable + git repack -adq +} + +(mkdir multi_round && cd multi_round + git init -q server && cd server + commit to_fetch + cd .. + + git init -q client && cd client + for i in $(seq 8); do + git checkout --orphan b$i && + commit b$i.c0 + done + + for j in $(seq 19); do + for i in $(seq 8); do + git checkout b$i && + commit b$i.c$j + done + done + optimize_repo + cd .. + (cd server + git fetch --no-tags "$PWD/../client" b1:refs/heads/b1 + git checkout b1 + commit commit-on-b1 + optimize_repo + ) +) diff --git a/gix/tests/remote/fetch.rs b/gix/tests/remote/fetch.rs index 3c4aaec8c2..1c640b114c 100644 --- a/gix/tests/remote/fetch.rs +++ b/gix/tests/remote/fetch.rs @@ -12,12 +12,15 @@ mod shallow { #[cfg(any(feature = "blocking-network-client", feature = "async-network-client-async-std"))] mod blocking_and_async_io { + use gix::config::tree::Key; use std::sync::atomic::AtomicBool; use gix::remote::{fetch, fetch::Status, Direction::Fetch}; use gix_features::progress; use gix_protocol::maybe_async; + use gix_testtools::tempfile::TempDir; + use crate::remote; use crate::{ remote::{into_daemon_remote_if_async, spawn_git_daemon_if_async}, util::hex_to_id, @@ -106,6 +109,90 @@ mod blocking_and_async_io { Ok(()) } + #[maybe_async::test( + feature = "blocking-network-client", + async(feature = "async-network-client-async-std", async_std::test) + )] + async fn fetch_with_multi_round_negotiation() -> crate::Result { + for (algorithm, expected_negotiation_rounds) in [ + (gix::negotiate::Algorithm::Consecutive, 4), + (gix::negotiate::Algorithm::Skipping, 2), + ] { + for version in [ + gix::protocol::transport::Protocol::V1, + gix::protocol::transport::Protocol::V2, + ] { + let (mut client_repo, _tmp) = { + let client_repo = remote::repo("multi_round/client"); + let daemon = spawn_git_daemon_if_async(client_repo.work_dir().expect("non-bare"))?; + let tmp = TempDir::new()?; + let repo = gix::prepare_clone_bare( + daemon.as_ref().map_or_else( + || client_repo.git_dir().to_owned(), + |d| std::path::PathBuf::from(format!("{}/", d.url)), + ), + tmp.path(), + )? + .fetch_only(gix::progress::Discard, &std::sync::atomic::AtomicBool::default()) + .await? + .0; + (repo, tmp) + }; + + { + let mut config = client_repo.config_snapshot_mut(); + config.set_raw_value( + gix::config::tree::Protocol::VERSION.section().name(), + None, + gix::config::tree::Protocol::VERSION.name(), + (version as u8).to_string().as_str(), + )?; + config.set_raw_value( + gix::config::tree::Fetch::NEGOTIATION_ALGORITHM.section().name(), + None, + gix::config::tree::Fetch::NEGOTIATION_ALGORITHM.name(), + algorithm.to_string().as_str(), + )?; + } + let server_repo = remote::repo("multi_round/server"); + let daemon = spawn_git_daemon_if_async(server_repo.work_dir().expect("non-bare"))?; + let remote = into_daemon_remote_if_async( + client_repo.remote_at(server_repo.work_dir().expect("non-bare"))?, + daemon.as_ref(), + None, + ); + let changes = remote + .with_refspecs(Some("refs/heads/*:refs/remotes/origin/*"), Fetch)? + .connect(Fetch) + .await? + .prepare_fetch(gix::progress::Discard, Default::default()) + .await? + .receive(gix::progress::Discard, &AtomicBool::default()) + .await?; + + match changes.status { + Status::Change { + write_pack_bundle, + negotiation_rounds, + .. + } => { + assert_eq!( + negotiation_rounds, expected_negotiation_rounds, + "we need multiple rounds" + ); + // the server only has our `b1` and an extra commit or two. + assert_eq!( + write_pack_bundle.index.num_objects, 7, + "this is the number git gets as well, we are quite perfectly aligned :)" + ); + } + _ => unreachable!("We expect a pack for sure"), + } + } + } + Ok(()) + } + #[maybe_async::test( feature = "blocking-network-client", async(feature = "async-network-client-async-std", async_std::test) @@ -232,15 +319,12 @@ mod blocking_and_async_io { .await?; match res.status { - fetch::Status::Change { write_pack_bundle, update_refs } => { - assert_eq!(write_pack_bundle.index.data_hash, hex_to_id("029d08823bd8a8eab510ad6ac75c823cfd3ed31e")); - assert_eq!(write_pack_bundle.index.num_objects, 0, "empty pack"); - assert!(write_pack_bundle.data_path.as_deref().map_or(false, |p| p.is_file())); - assert!(write_pack_bundle.index_path.as_deref().map_or(false, |p| p.is_file())); + fetch::Status::NoPackReceived { update_refs } => { assert_eq!(update_refs.edits.len(), expected_ref_count); - assert!(!write_pack_bundle.keep_path.as_deref().map_or(false, |p| p.is_file()), ".keep files are deleted if at least one ref-edit was made or the pack is empty"); - }, - _ => unreachable!("Naive negotiation sends the same have and wants, resulting in an empty pack (technically no change, but we don't detect it) - empty packs are fine") + } + _ => unreachable!( + "default negotiation is able to realize nothing is required and doesn't get to receiving a pack" + ), } } Ok(()) @@ -290,7 +374,8 @@ mod blocking_and_async_io { .await?; match res.status { - gix::remote::fetch::Status::Change { write_pack_bundle, update_refs } => { + gix::remote::fetch::Status::Change { write_pack_bundle, update_refs, negotiation_rounds } => { + assert_eq!(negotiation_rounds, 1); assert_eq!(write_pack_bundle.index.data_hash, hex_to_id(expected_data_hash), ); assert_eq!(write_pack_bundle.index.num_objects, 3 + num_objects_offset, "{fetch_tags:?}"); assert!(write_pack_bundle.data_path.as_deref().map_or(false, |p| p.is_file())); @@ -373,7 +458,9 @@ mod blocking_and_async_io { fetch::Status::Change { write_pack_bundle, update_refs, + negotiation_rounds, } => { + assert_eq!(negotiation_rounds, 1); assert_eq!(write_pack_bundle.pack_version, gix::odb::pack::data::Version::V2); assert_eq!(write_pack_bundle.object_hash, repo.object_hash()); assert_eq!(write_pack_bundle.index.num_objects, 4, "{dry_run}: this value is 4 when git does it with 'consecutive' negotiation style, but could be 33 if completely naive."); @@ -409,8 +496,16 @@ mod blocking_and_async_io { update_refs } - fetch::Status::DryRun { update_refs } => update_refs, - fetch::Status::NoPackReceived { .. } => unreachable!("we firmly expect changes here"), + fetch::Status::DryRun { + update_refs, + negotiation_rounds, + } => { + assert_eq!(negotiation_rounds, 1); + update_refs + } + fetch::Status::NoPackReceived { .. } => { + unreachable!("we firmly expect changes here, as the other origin has changes") + } }; assert_eq!( From ae1bc41817bec3b83fe65104e7e3efe4bd798a78 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jun 2023 16:30:47 +0200 Subject: [PATCH 15/16] Add test to validate alternates in the context of fetching --- gix/tests/remote/fetch.rs | 47 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/gix/tests/remote/fetch.rs b/gix/tests/remote/fetch.rs index 1c640b114c..a24fe30d77 100644 --- a/gix/tests/remote/fetch.rs +++ b/gix/tests/remote/fetch.rs @@ -109,6 +109,53 @@ mod blocking_and_async_io { Ok(()) } + #[test] + #[cfg(feature = "blocking-network-client")] + fn fetch_with_alternates_adds_tips_from_alternates() -> crate::Result<()> { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let remote_repo = remote::repo("base"); + let (_repo, out) = gix::clone::PrepareFetch::new( + remote::repo("multi_round/server").path(), + tmp.path(), + gix::create::Kind::Bare, + Default::default(), + gix::open::Options::isolated(), + )? + .configure_remote({ + move |r| { + std::fs::write( + r.repo().objects.store_ref().path().join("info").join("alternates"), + format!( + "{}\n", + gix::path::realpath(remote_repo.objects.store_ref().path())?.display() + ) + .as_bytes(), + )?; + Ok(r) + } + }) + .fetch_only(gix::progress::Discard, &AtomicBool::default())?; + + match out.status { + Status::Change { + negotiation_rounds, + write_pack_bundle, + .. + } => { + assert_eq!( + negotiation_rounds, 1, + "we don't really have a way to see that tips from alternates were added, I think" + ); + assert_eq!( + write_pack_bundle.index.num_objects, 66, + "this test just exercises code for adding alternate-repo tips to the negotiator" + ); + } + _ => unreachable!("we get a pack as alternates are unrelated"), + } + Ok(()) + } + #[maybe_async::test( feature = "blocking-network-client", async(feature = "async-network-client-async-std", async_std::test) From 7983f6fd4ef242d494962fe35b8b8f91a6135f32 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jun 2023 20:59:45 +0200 Subject: [PATCH 16/16] adapt to changes in `gix` --- gitoxide-core/src/repository/clone.rs | 16 ++++++++++++-- gitoxide-core/src/repository/fetch.rs | 30 ++++++++++++++++++++++++--- src/plumbing/progress.rs | 4 ++++ 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/gitoxide-core/src/repository/clone.rs b/gitoxide-core/src/repository/clone.rs index b92e0d9f60..3a29fd7138 100644 --- a/gitoxide-core/src/repository/clone.rs +++ b/gitoxide-core/src/repository/clone.rs @@ -92,12 +92,24 @@ pub(crate) mod function { writeln!(err, "The cloned repository appears to be empty")?; } Status::DryRun { .. } => unreachable!("dry-run unsupported"), - Status::Change { update_refs, .. } => { + Status::Change { + update_refs, + negotiation_rounds, + .. + } => { let remote = repo .find_default_remote(gix::remote::Direction::Fetch) .expect("one origin remote")?; let ref_specs = remote.refspecs(gix::remote::Direction::Fetch); - print_updates(&repo, update_refs, ref_specs, fetch_outcome.ref_map, &mut out, &mut err)?; + print_updates( + &repo, + negotiation_rounds, + update_refs, + ref_specs, + fetch_outcome.ref_map, + &mut out, + &mut err, + )?; } }; diff --git a/gitoxide-core/src/repository/fetch.rs b/gitoxide-core/src/repository/fetch.rs index 3ee8459a87..d2139b9415 100644 --- a/gitoxide-core/src/repository/fetch.rs +++ b/gitoxide-core/src/repository/fetch.rs @@ -63,14 +63,34 @@ pub(crate) mod function { let ref_specs = remote.refspecs(gix::remote::Direction::Fetch); match res.status { Status::NoPackReceived { update_refs } => { - print_updates(&repo, update_refs, ref_specs, res.ref_map, &mut out, err) + print_updates(&repo, 1, update_refs, ref_specs, res.ref_map, &mut out, err) } - Status::DryRun { update_refs } => print_updates(&repo, update_refs, ref_specs, res.ref_map, &mut out, err), + Status::DryRun { + update_refs, + negotiation_rounds, + } => print_updates( + &repo, + negotiation_rounds, + update_refs, + ref_specs, + res.ref_map, + &mut out, + err, + ), Status::Change { update_refs, write_pack_bundle, + negotiation_rounds, } => { - print_updates(&repo, update_refs, ref_specs, res.ref_map, &mut out, err)?; + print_updates( + &repo, + negotiation_rounds, + update_refs, + ref_specs, + res.ref_map, + &mut out, + err, + )?; if let Some(data_path) = write_pack_bundle.data_path { writeln!(out, "pack file: \"{}\"", data_path.display()).ok(); } @@ -88,6 +108,7 @@ pub(crate) mod function { pub(crate) fn print_updates( repo: &gix::Repository, + negotiation_rounds: usize, update_refs: gix::remote::fetch::refs::update::Outcome, refspecs: &[gix::refspec::RefSpec], mut map: gix::remote::fetch::RefMap, @@ -191,6 +212,9 @@ pub(crate) mod function { refspecs.len() )?; } + if negotiation_rounds != 1 { + writeln!(err, "needed {negotiation_rounds} rounds of pack-negotiation")?; + } Ok(()) } } diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 50f917c17d..659e7d11e6 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -120,6 +120,10 @@ static GIT_CONFIG: &[Record] = &[ config: "core.alternateRefsCommand", usage: NotPlanned { reason: "there is no need as we can perform the required operation in-binary. This could happen though if there is a use-case and demand." } }, + Record { + config: "core.alternateRefsPrefixes", + usage: NotPlanned { reason: "seems like a niche feature, but can be implemented if there is demand" } + }, Record { config: "core.checkRoundtripEncoding", usage: Planned { note: Some("needed once working-tree-encoding attributes are supported") }