From 50df9633bd3474ef61ba0605afe8abde5a810f0e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 14 Dec 2024 18:17:01 -0800 Subject: [PATCH 01/58] doc: give attr.tree a bit more visibility In "git help config" output, attr.tree mentions both --attr-source and GIT_ATTR_SOURCE, but the description of --attr-source and GIT_ATTR_SOURCE that appear in "git help git", attr.tree is missing. Add it so that these three are described together in both places. Signed-off-by: Junio C Hamano --- Documentation/git.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/git.txt b/Documentation/git.txt index 024a01df6c29c5..ec8a95dc7dab01 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -224,7 +224,10 @@ If you just want to run git as if it was started in `` then use --attr-source=:: Read gitattributes from instead of the worktree. See linkgit:gitattributes[5]. This is equivalent to setting the - `GIT_ATTR_SOURCE` environment variable. + `GIT_ATTR_SOURCE` environment variable. The `attr.tree` + configuration variable is used as a fallback when this option + or the environment variable are not in use. + GIT COMMANDS ------------ From c364310f83e726ab108cbaa7e5c1f508320e229e Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:45 -0800 Subject: [PATCH 02/58] t/t4209-log-pickaxe: Naming typo: -G takes a regex Not effect on the test logic, but as "-G" argument is a regex it is more accurate to use "regex" as a dummy argument value rather than "string". In all the other case when "-G" is passed a dummy value it is spelled as "regex" rather than as "string". Signed-off-by: Junio C Hamano --- t/t4209-log-pickaxe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index a675ace0819f2d..ed70cdfc4ac538 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -89,7 +89,7 @@ test_expect_success 'usage: --no-pickaxe-regex' ' test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual && test_cmp expect actual && - test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err && + test_expect_code 128 git log -Gregex --no-pickaxe-regex 2>err && test_cmp expect actual ' From 584a16821b6998325b5cc60b970d69cf8faea950 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:46 -0800 Subject: [PATCH 03/58] diff: -G description: Correct copy/paste error Current description for -G is incorrect, seems like it was copied from the description for -S. Signed-off-by: Junio C Hamano --- diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 0822ae443361f8..62ae60de173627 100644 --- a/diff.c +++ b/diff.c @@ -5866,7 +5866,7 @@ struct option *add_diff_options(const struct option *opts, N_("look for differences that change the number of occurrences of the specified string"), 0, diff_opt_pickaxe_string), OPT_CALLBACK_F('G', NULL, options, N_(""), - N_("look for differences that change the number of occurrences of the specified regex"), + N_("look for differences where a patch contains the specified regex"), 0, diff_opt_pickaxe_regex), OPT_BIT_F(0, "pickaxe-all", &options->pickaxe_opts, N_("show all changes in the changeset with -S or -G"), From 92757728872a98f2ef4fe04c3ea50ce39e7a6fb4 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:47 -0800 Subject: [PATCH 04/58] diff: short help: Correct -S description `-S` shows changes that modify the number of occurrences of the specified string, rather than only those that either completely remove it or add it for the first time. Signed-off-by: Junio C Hamano --- diff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diff.h b/diff.h index 7831ed1a2b1fd1..f7319b7caac61e 100644 --- a/diff.h +++ b/diff.h @@ -606,7 +606,7 @@ void diffcore_fix_diff_index(void); " try unchanged files as candidate for copy detection.\n" \ " -l limit rename attempts up to paths.\n" \ " -O reorder diffs according to the .\n" \ -" -S find filepair whose only one side contains the string.\n" \ +" -S find filepair who differ in the number of occurrences of string.\n" \ " --pickaxe-all\n" \ " show all files diff when -S is used and hit is found.\n" \ " -a --text treat all files as text.\n" From 38876148a5b3b346f47b4fcdc3554c26ffdb8731 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:48 -0800 Subject: [PATCH 05/58] diff: short help: Add -G and --pickaxe-grep -G and --pickaxe-grep seems to be on par with -S and --pickaxe-all that are already mentioned. Signed-off-by: Junio C Hamano --- diff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/diff.h b/diff.h index f7319b7caac61e..f5faea3ecad8df 100644 --- a/diff.h +++ b/diff.h @@ -606,9 +606,12 @@ void diffcore_fix_diff_index(void); " try unchanged files as candidate for copy detection.\n" \ " -l limit rename attempts up to paths.\n" \ " -O reorder diffs according to the .\n" \ +" -G find differences where patch contains the specified regex.\n" \ " -S find filepair who differ in the number of occurrences of string.\n" \ +" --pickaxe-grep\n" \ +" treat as a regex in the -S argument.\n" \ " --pickaxe-all\n" \ -" show all files diff when -S is used and hit is found.\n" \ +" show all files diff when -G or -S is used and hit is found.\n" \ " -a --text treat all files as text.\n" int diff_queue_is_empty(struct diff_options *o); From 3e234437defb84b809f64cba35fa174e394baeb2 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:49 -0800 Subject: [PATCH 06/58] docs: gitdiffcore: -G and -S: Use regex/string placeholders In the rest of the documentation (and in the code) we use `regex` and `string` as `-G` and `-S` argument placeholders. While `regular-expression` and `block-of-text` are a bit easier to read, it is a bit consistent. And we could assume that everyone who uses git should be able to understand that a "string" and a "block-of-text", as well as a "regex" and "regular-expression" are the same thing. So, using a shorter version is also more consistent. Signed-off-by: Junio C Hamano --- Documentation/gitdiffcore.txt | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt index 642c51227b5a0b..0d7d660ca4b67a 100644 --- a/Documentation/gitdiffcore.txt +++ b/Documentation/gitdiffcore.txt @@ -245,26 +245,25 @@ diffcore-pickaxe: For Detecting Addition/Deletion of Specified String This transformation limits the set of filepairs to those that change specified strings between the preimage and the postimage in a certain -way. -S and -G options are used to -specify different ways these strings are sought. +way. `-S` and `-G` options are used to specify +different ways these strings are sought. -"-S" detects filepairs whose preimage and postimage -have different number of occurrences of the specified block of text. +`-S` detects filepairs whose preimage and postimage +have different number of occurrences of the specified __. By definition, it will not detect in-file moves. Also, when a changeset moves a file wholesale without affecting the interesting string, diffcore-rename kicks in as usual, and `-S` omits the filepair (since the number of occurrences of that string didn't change in that rename-detected filepair). When used with `--pickaxe-regex`, treat -the as an extended POSIX regular expression to match, +the __ as an extended POSIX regular expression to match, instead of a literal string. -"-G" (mnemonic: grep) detects filepairs whose -textual diff has an added or a deleted line that matches the given -regular expression. This means that it will detect in-file (or what -rename-detection considers the same file) moves, which is noise. The -implementation runs diff twice and greps, and this can be quite -expensive. To speed things up, binary files without textconv filters -will be ignored. +`-G` (mnemonic: grep) detects filepairs whose textual diff has +an added or a deleted line that matches the given __. This +means that it will detect in-file (or what rename-detection considers +the same file) moves, which is noise. The implementation runs diff +twice and greps, and this can be quite expensive. To speed things up, +binary files without textconv filters will be ignored. When `-S` or `-G` are used without `--pickaxe-all`, only filepairs that match their respective criterion are kept in the output. When From 5463eeaedb2a818f0ac4d590add9808bf3249cd1 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:50 -0800 Subject: [PATCH 07/58] diff: --patch-{grep,modifies} arg names for -G and -S Most arguments have both short and long versions. Long versions are easier to read, especially in scripts and command history. This change mostly keeps existing uses of -G and -S as is in the tests, documentation and help output. Tests that check just the option parsing are duplicated to check both short and long argument options. Signed-off-by: Illia Bobyr Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 2 ++ Documentation/gitdiffcore.txt | 3 ++- diff.c | 12 ++++++---- diff.h | 8 +++++-- t/t4209-log-pickaxe.sh | 42 ++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 640eb6e7db58a5..07413df93785f6 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -650,6 +650,7 @@ Note that not all diffs can feature all types. For instance, copied and renamed entries cannot appear if detection for those types is disabled. `-S`:: +`--patch-modifies=`:: Look for differences that change the number of occurrences of the specified __ (i.e. addition/deletion) in a file. Intended for the scripter's use. @@ -663,6 +664,7 @@ very first version of the block. Binary files are searched as well. `-G`:: +`--patch-grep=`:: Look for differences whose patch text contains added/removed lines that match __. + diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt index 0d7d660ca4b67a..e934b97a75cf6c 100644 --- a/Documentation/gitdiffcore.txt +++ b/Documentation/gitdiffcore.txt @@ -245,7 +245,8 @@ diffcore-pickaxe: For Detecting Addition/Deletion of Specified String This transformation limits the set of filepairs to those that change specified strings between the preimage and the postimage in a certain -way. `-S` and `-G` options are used to specify +way. `--patch-modifies=` (`-S` for short) and +`--patch-grep=` (`-G` for short) are used to specify different ways these strings are sought. `-S` detects filepairs whose preimage and postimage diff --git a/diff.c b/diff.c index 62ae60de173627..5986728026be34 100644 --- a/diff.c +++ b/diff.c @@ -4877,15 +4877,17 @@ void diff_setup_done(struct diff_options *options) if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK)) die(_("options '%s', '%s', and '%s' cannot be used together"), - "-G", "-S", "--find-object"); + "-G/--patch-grep", "-S/--patch-modifies", "--find-object"); if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_G_REGEX_MASK)) die(_("options '%s' and '%s' cannot be used together, use '%s' with '%s'"), - "-G", "--pickaxe-regex", "--pickaxe-regex", "-S"); + "-G/--patch-grep", "--pickaxe-regex", + "--pickaxe-regex", "-S/--patch-modifies"); if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK)) die(_("options '%s' and '%s' cannot be used together, use '%s' with '%s' and '%s'"), - "--pickaxe-all", "--find-object", "--pickaxe-all", "-G", "-S"); + "--pickaxe-all", "--find-object", + "--pickaxe-all", "-G/--patch-grep", "-S/--patch-modifies"); /* * Most of the time we can say "there are changes" @@ -5862,10 +5864,10 @@ struct option *add_diff_options(const struct option *opts, OPT_SET_INT_F(0, "ita-visible-in-index", &options->ita_invisible_in_index, N_("treat 'git add -N' entries as real in the index"), 0, PARSE_OPT_NONEG), - OPT_CALLBACK_F('S', NULL, options, N_(""), + OPT_CALLBACK_F('S', "patch-modifies", options, N_(""), N_("look for differences that change the number of occurrences of the specified string"), 0, diff_opt_pickaxe_string), - OPT_CALLBACK_F('G', NULL, options, N_(""), + OPT_CALLBACK_F('G', "patch-grep", options, N_(""), N_("look for differences where a patch contains the specified regex"), 0, diff_opt_pickaxe_regex), OPT_BIT_F(0, "pickaxe-all", &options->pickaxe_opts, diff --git a/diff.h b/diff.h index f5faea3ecad8df..6d1ec74bb8e9e8 100644 --- a/diff.h +++ b/diff.h @@ -606,8 +606,12 @@ void diffcore_fix_diff_index(void); " try unchanged files as candidate for copy detection.\n" \ " -l limit rename attempts up to paths.\n" \ " -O reorder diffs according to the .\n" \ -" -G find differences where patch contains the specified regex.\n" \ -" -S find filepair who differ in the number of occurrences of string.\n" \ +" -G\n" \ +" --patch-grep=\n" \ +" find differences where patch contains the regex.\n" \ +" -S\n" \ +" --patch-modifies=\n" \ +" find filepair who differ in the number of occurrences of string.\n" \ " --pickaxe-grep\n" \ " treat as a regex in the -S argument.\n" \ " --pickaxe-all\n" \ diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index ed70cdfc4ac538..ab14b2412f878e 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -60,24 +60,48 @@ test_expect_success 'usage' ' test_expect_code 129 git log -S 2>err && test_grep "switch.*requires a value" err && + test_expect_code 129 git log --patch-modifies 2>err && + test_grep "option.*requires a value" err && + test_expect_code 129 git log -G 2>err && test_grep "switch.*requires a value" err && + test_expect_code 129 git log --patch-grep 2>err && + test_grep "option.*requires a value" err && + test_expect_code 128 git log -Gregex -Sstring 2>err && grep "cannot be used together" err && + test_expect_code 128 git log -Gregex --patch-modifies string 2>err && + grep "cannot be used together" err && + + test_expect_code 128 git log --patch-grep regex -Sstring 2>err && + grep "cannot be used together" err && + + test_expect_code 128 git log --patch-grep regex --patch-modifies string 2>err && + grep "cannot be used together" err && + test_expect_code 128 git log -Gregex --find-object=HEAD 2>err && grep "cannot be used together" err && + test_expect_code 128 git log --patch-grep regex --find-object=HEAD 2>err && + grep "cannot be used together" err && + test_expect_code 128 git log -Sstring --find-object=HEAD 2>err && grep "cannot be used together" err && + test_expect_code 128 git log --patch-modifies string --find-object=HEAD 2>err && + grep "cannot be used together" err && + test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err && grep "cannot be used together" err ' test_expect_success 'usage: --pickaxe-regex' ' test_expect_code 128 git log -Gregex --pickaxe-regex 2>err && + grep "cannot be used together" err && + + test_expect_code 128 git log --patch-grep regex --pickaxe-regex 2>err && grep "cannot be used together" err ' @@ -89,7 +113,13 @@ test_expect_success 'usage: --no-pickaxe-regex' ' test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual && test_cmp expect actual && + test_expect_code 128 git log --patch-modifies string --no-pickaxe-regex 2>actual && + test_cmp expect actual && + test_expect_code 128 git log -Gregex --no-pickaxe-regex 2>err && + test_cmp expect actual && + + test_expect_code 128 git log --patch-grep regex --no-pickaxe-regex 2>err && test_cmp expect actual ' @@ -104,9 +134,13 @@ test_log_icase expect_second --author person test_log_icase expect_nomatch --author spreon test_log expect_nomatch -G picked +test_log expect_nomatch --patch-grep picked test_log expect_second -G Picked +test_log expect_second --patch-grep Picked test_log_icase expect_nomatch -G pickle +test_log_icase expect_nomatch --patch-grep pickle test_log_icase expect_second -G picked +test_log_icase expect_second --patch-grep picked test_expect_success 'log -G --textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && @@ -122,14 +156,22 @@ test_expect_success 'log -G --no-textconv (missing textconv tool)' ' ' test_log expect_nomatch -S picked +test_log expect_nomatch --patch-modifies picked test_log expect_second -S Picked +test_log expect_second --patch-modifies Picked test_log_icase expect_second -S picked +test_log_icase expect_second --patch-modifies picked test_log_icase expect_nomatch -S pickle +test_log_icase expect_nomatch --patch-modifies pickle test_log expect_nomatch -S p.cked --pickaxe-regex +test_log expect_nomatch --patch-modifies p.cked --pickaxe-regex test_log expect_second -S P.cked --pickaxe-regex +test_log expect_second --patch-modifies P.cked --pickaxe-regex test_log_icase expect_second -S p.cked --pickaxe-regex +test_log_icase expect_second --patch-modifies p.cked --pickaxe-regex test_log_icase expect_nomatch -S p.ckle --pickaxe-regex +test_log_icase expect_nomatch --patch-modifies p.ckle --pickaxe-regex test_expect_success 'log -S --textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && From 16d9c40febe65a9b095fa22aebf1c19011967e28 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:51 -0800 Subject: [PATCH 08/58] completion: Support --patch-{grep,modifies} Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 413911be3be313..daf3354f5426d7 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1900,6 +1900,7 @@ __git_diff_common_options="--stat --numstat --shortstat --summary --output= --output-indicator-context= --output-indicator-new= --output-indicator-old= --ws-error-highlight= + --patch-grep= --patch-modifies= --pickaxe-all --pickaxe-regex --patch-with-raw " @@ -2216,7 +2217,7 @@ __git_complete_log_opts () __git_complete_symbol --cur="${cur#:}" --sfx=":" return ;; - -G,*|-S,*) + -G,*|--patch-grep,*|-S,*|--patch-modifies,*) __git_complete_symbol return ;; @@ -2239,6 +2240,14 @@ __git_complete_log_opts () __gitcomp "$__git_diff_algorithms" "" "${cur##--diff-algorithm=}" return ;; + --patch-grep=*) + __git_complete_symbol --pfx="--patch-grep=" --cur="${cur#--patch-grep=}" + return + ;; + --patch-modifies=*) + __git_complete_symbol --pfx="--patch-modifies=" --cur="${cur#--patch-modifies=}" + return + ;; --submodule=*) __gitcomp "$__git_diff_submodule_formats" "" "${cur##--submodule=}" return From 07a176cdab7a95f53f9df837b8f3a20583b3772d Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:52 -0800 Subject: [PATCH 09/58] diff: test: Use --patch-{grep,modifies} over -G/-S Long argument names are easier to read, compared to short ones. So while short arguments are great when you want to type a command quickly, tests are more readable if we use long argument names. There are still test that verify that both short and long arguments work interchangeably when parsing the arguments. Tests where the focus is not on the argument names are updated to use long argument names. Signed-off-by: Junio C Hamano --- t/t4062-diff-pickaxe.sh | 8 +++--- t/t4209-log-pickaxe.sh | 62 ++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/t/t4062-diff-pickaxe.sh b/t/t4062-diff-pickaxe.sh index 8ad3d799579230..805e0f4ab49fb8 100755 --- a/t/t4062-diff-pickaxe.sh +++ b/t/t4062-diff-pickaxe.sh @@ -16,13 +16,13 @@ test_expect_success setup ' ' # OpenBSD only supports up to 255 repetitions, so repeat twice for 64*64=4096. -test_expect_success '-G matches' ' - git diff --name-only -G "^(0{64}){64}$" HEAD^ >out && +test_expect_success '--patch-grep matches' ' + git diff --name-only --patch-grep "^(0{64}){64}$" HEAD^ >out && test 4096-zeroes.txt = "$(cat out)" ' -test_expect_success '-S --pickaxe-regex' ' - git diff --name-only -S0 --pickaxe-regex HEAD^ >out && +test_expect_success '--patch-modifies --pickaxe-regex' ' + git diff --name-only --patch-modifies 0 --pickaxe-regex HEAD^ >out && test 4096-zeroes.txt = "$(cat out)" ' diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index ab14b2412f878e..5f4d6f75013675 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -1,6 +1,6 @@ #!/bin/sh -test_description='log --grep/--author/--regexp-ignore-case/-S/-G' +test_description='log --grep/--author/--regexp-ignore-case/--patch-{modifies,grep}' . ./test-lib.sh @@ -142,15 +142,15 @@ test_log_icase expect_nomatch --patch-grep pickle test_log_icase expect_second -G picked test_log_icase expect_second --patch-grep picked -test_expect_success 'log -G --textconv (missing textconv tool)' ' +test_expect_success 'log --patch-grep --textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && - test_must_fail git -c diff.test.textconv=missing log -Gfoo && + test_must_fail git -c diff.test.textconv=missing log --patch-grep foo && rm .gitattributes ' -test_expect_success 'log -G --no-textconv (missing textconv tool)' ' +test_expect_success 'log --patch-grep --no-textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && - git -c diff.test.textconv=missing log -Gfoo --no-textconv >actual && + git -c diff.test.textconv=missing log --patch-grep foo --no-textconv >actual && test_cmp expect_nomatch actual && rm .gitattributes ' @@ -173,20 +173,20 @@ test_log_icase expect_second --patch-modifies p.cked --pickaxe-regex test_log_icase expect_nomatch -S p.ckle --pickaxe-regex test_log_icase expect_nomatch --patch-modifies p.ckle --pickaxe-regex -test_expect_success 'log -S --textconv (missing textconv tool)' ' +test_expect_success 'log --patch-modifies --textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && - test_must_fail git -c diff.test.textconv=missing log -Sfoo && + test_must_fail git -c diff.test.textconv=missing log --patch-modifies foo && rm .gitattributes ' -test_expect_success 'log -S --no-textconv (missing textconv tool)' ' +test_expect_success 'log --patch-modifies --no-textconv (missing textconv tool)' ' echo "* diff=test" >.gitattributes && - git -c diff.test.textconv=missing log -Sfoo --no-textconv >actual && + git -c diff.test.textconv=missing log --patch-modifies foo --no-textconv >actual && test_cmp expect_nomatch actual && rm .gitattributes ' -test_expect_success 'setup log -[GS] plain & regex' ' +test_expect_success 'setup log --patch{-modifies,-grep} plain & regex' ' test_create_repo GS-plain && test_commit -C GS-plain --append A data.txt "a" && test_commit -C GS-plain --append B data.txt "a a" && @@ -201,31 +201,31 @@ test_expect_success 'setup log -[GS] plain & regex' ' git -C GS-plain log >full-log ' -test_expect_success 'log -G trims diff new/old [-+]' ' - git -C GS-plain log -G"[+-]a" >log && +test_expect_success 'log --patch-grep trims diff new/old [-+]' ' + git -C GS-plain log --patch-grep "[+-]a" >log && test_must_be_empty log && - git -C GS-plain log -G"^a" >log && + git -C GS-plain log --patch-grep "^a" >log && test_cmp log A-to-B-then-E-log ' -test_expect_success 'log -S is not a regex, but -S --pickaxe-regex is' ' - git -C GS-plain log -S"a" >log && +test_expect_success 'log --patch-modifies is not a regex, but --patch-modifies --pickaxe-regex is' ' + git -C GS-plain log --patch-modifies "a" >log && test_cmp log A-to-B-then-E-log && - git -C GS-plain log -S"[a]" >log && + git -C GS-plain log --patch-modifies "[a]" >log && test_must_be_empty log && - git -C GS-plain log -S"[a]" --pickaxe-regex >log && + git -C GS-plain log --patch-modifies "[a]" --pickaxe-regex >log && test_cmp log A-to-B-then-E-log && - git -C GS-plain log -S"[b]" >log && + git -C GS-plain log --patch-modifies "[b]" >log && test_cmp log D-then-E-log && - git -C GS-plain log -S"[b]" --pickaxe-regex >log && + git -C GS-plain log --patch-modifies "[b]" --pickaxe-regex >log && test_cmp log C-to-D-then-E-log ' -test_expect_success 'setup log -[GS] binary & --text' ' +test_expect_success 'setup log --patch{-modifies,-grep} binary & --text' ' test_create_repo GS-bin-txt && test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" && test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" && @@ -233,36 +233,36 @@ test_expect_success 'setup log -[GS] binary & --text' ' git -C GS-bin-txt log >full-log ' -test_expect_success 'log -G ignores binary files' ' - git -C GS-bin-txt log -Ga >log && +test_expect_success 'log --patch-grep ignores binary files' ' + git -C GS-bin-txt log --patch-grep a >log && test_must_be_empty log ' -test_expect_success 'log -G looks into binary files with -a' ' - git -C GS-bin-txt log -a -Ga >log && +test_expect_success 'log --patch-grep looks into binary files with -a' ' + git -C GS-bin-txt log -a --patch-grep a >log && test_cmp log full-log ' -test_expect_success 'log -G looks into binary files with textconv filter' ' +test_expect_success 'log --patch-grep looks into binary files with textconv filter' ' test_when_finished "rm GS-bin-txt/.gitattributes" && ( cd GS-bin-txt && echo "* diff=bin" >.gitattributes && - git -c diff.bin.textconv=cat log -Ga >../log + git -c diff.bin.textconv=cat log --patch-grep a >../log ) && test_cmp log full-log ' -test_expect_success 'log -S looks into binary files' ' - git -C GS-bin-txt log -Sa >log && +test_expect_success 'log --patch-modifies looks into binary files' ' + git -C GS-bin-txt log --patch-modifies a >log && test_cmp log full-log ' -test_expect_success 'log -S --pickaxe-regex looks into binary files' ' - git -C GS-bin-txt log --pickaxe-regex -Sa >log && +test_expect_success 'log --patch-modifies --pickaxe-regex looks into binary files' ' + git -C GS-bin-txt log --pickaxe-regex --patch-modifies a >log && test_cmp log full-log && - git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log && + git -C GS-bin-txt log --pickaxe-regex --patch-modifies "[a]" >log && test_cmp log full-log ' From 80b09099f88bb73430e785a6ff4272a616a129e3 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:53 -0800 Subject: [PATCH 10/58] diff: --pickaxe-{all,regex} help: Add --patch-{grep,modifies} For less experienced users --patch-{grep,modifies} should be easier to understand than just -S or -G. By mentioning the long argument names in the help messages we save those users from having to search the list of options for an explanation of what -S or -G stand for. Signed-off-by: Junio C Hamano --- diff.c | 4 ++-- diff.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/diff.c b/diff.c index 5986728026be34..69d8d96e0ba920 100644 --- a/diff.c +++ b/diff.c @@ -5871,10 +5871,10 @@ struct option *add_diff_options(const struct option *opts, N_("look for differences where a patch contains the specified regex"), 0, diff_opt_pickaxe_regex), OPT_BIT_F(0, "pickaxe-all", &options->pickaxe_opts, - N_("show all changes in the changeset with -S or -G"), + N_("show all changes in the changeset with -S/--patch-modifies or -G/--patch-grep"), DIFF_PICKAXE_ALL, PARSE_OPT_NONEG), OPT_BIT_F(0, "pickaxe-regex", &options->pickaxe_opts, - N_("treat in -S as extended POSIX regular expression"), + N_("treat in -S/--patch-modifies as extended POSIX regular expression"), DIFF_PICKAXE_REGEX, PARSE_OPT_NONEG), OPT_FILENAME('O', NULL, &options->orderfile, N_("control the order in which files appear in the output")), diff --git a/diff.h b/diff.h index 6d1ec74bb8e9e8..3d8608a569f563 100644 --- a/diff.h +++ b/diff.h @@ -613,9 +613,9 @@ void diffcore_fix_diff_index(void); " --patch-modifies=\n" \ " find filepair who differ in the number of occurrences of string.\n" \ " --pickaxe-grep\n" \ -" treat as a regex in the -S argument.\n" \ +" treat as a regex in the -S/--patch-modifies argument.\n" \ " --pickaxe-all\n" \ -" show all files diff when -G or -S is used and hit is found.\n" \ +" show all files diff for -G/--patch-grep and -S/--patch-modifies.\n" \ " -a --text treat all files as text.\n" int diff_queue_is_empty(struct diff_options *o); From ffdfcab88c2194b66b024a21e75b855d801653a9 Mon Sep 17 00:00:00 2001 From: Illia Bobyr Date: Tue, 11 Feb 2025 19:26:54 -0800 Subject: [PATCH 11/58] diff: docs: Use --patch-{grep,modifies} over -G/-S Long argument names are easier to read, compared to short ones. So while short arguments are great when you want to type a command quickly, the documentation readability is improved if we use long argument names. Note for reviewers: All changes are just a replacement of `-G` with `--patch-grep` and `-S` with `--patch-modifies`. But as the text was reformatted to fit the same width in a few places it might look like there are more changes, if the diff is only line-wise and not word-wise. The only an exception are changes in `gitdiffcore.adoc`, where I did rephrase a sentence. I've moved introduction of the short versions of the `--patch-{grep,modifies}` into a subsequent paragraph. The reason is that I wanted to keep a note on the `-G` mnemonic, and it was awkward if I would repeat the short definition twice over a span of two paragraphs. Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 34 ++++++++++----------- Documentation/git-blame.txt | 2 +- Documentation/gitdiffcore.txt | 55 +++++++++++++++++----------------- 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 07413df93785f6..c9f7c972d76cb4 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -658,8 +658,8 @@ renamed entries cannot appear if detection for those types is disabled. It is useful when you're looking for an exact block of code (like a struct), and want to know the history of that block since it first came into being: use the feature iteratively to feed the interesting -block in the preimage back into `-S`, and keep going until you get the -very first version of the block. +block in the preimage back into `--patch-modifies`, and keep going until +you get the very first version of the block. + Binary files are searched as well. @@ -668,9 +668,9 @@ Binary files are searched as well. Look for differences whose patch text contains added/removed lines that match __. + -To illustrate the difference between `-S` `--pickaxe-regex` and -`-G`, consider a commit with the following diff in the same -file: +To illustrate the difference between `--patch-modifies= +--pickaxe-regex` and `--patch-grep=`, consider a commit with the +following diff in the same file: + ---- + return frotz(nitfol, two->ptr, 1, 0); @@ -678,9 +678,9 @@ file: - hit = frotz(nitfol, mf2.ptr, 1, 0); ---- + -While `git log -G"frotz\(nitfol"` will show this commit, `git log --S"frotz\(nitfol" --pickaxe-regex` will not (because the number of -occurrences of that string did not change). +While `git log --patch-grep="frotz\(nitfol"` will show this commit, `git +log --patch-modifies="frotz\(nitfol" --pickaxe-regex` will not (because the +number of occurrences of that string did not change). + Unless `--text` is supplied patches of binary files without a textconv filter will be ignored. @@ -689,22 +689,22 @@ See the 'pickaxe' entry in linkgit:gitdiffcore[7] for more information. `--find-object=`:: - Look for differences that change the number of occurrences of - the specified object. Similar to `-S`, just the argument is different - in that it doesn't search for a specific string but for a specific - object id. + Look for differences that change the number of occurrences of the + specified object. Similar to `--patch-modifies`, just the argument + is different in that it doesn't search for a specific string but + for a specific object id. + The object can be a blob or a submodule commit. It implies the `-t` option in `git-log` to also find trees. `--pickaxe-all`:: - When `-S` or `-G` finds a change, show all the changes in that - changeset, not just the files that contain the change - in __. + When `--patch-modifies` or `--patch-grep` finds a change, show all + the changes in that changeset, not just the files that contain the + change in __. `--pickaxe-regex`:: - Treat the __ given to `-S` as an extended POSIX regular - expression to match. + Treat the __ given to `--patch-modifies` as an extended + POSIX regular expression to match. endif::git-format-patch[] diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt index b1d7fb539d0216..0f21d3801837ce 100644 --- a/Documentation/git-blame.txt +++ b/Documentation/git-blame.txt @@ -41,7 +41,7 @@ a text string in the diff. A small example of the pickaxe interface that searches for `blame_usage`: ----------------------------------------------------------------------------- -$ git log --pretty=oneline -S'blame_usage' +$ git log --pretty=oneline --patch-modifies='blame_usage' 5040f17eba15504bad66b14a645bddd9b015ebb7 blame -S ea4c7f9bf69e781dd0cd88d2bccb2bf5cc15c9a7 git-blame: Make the output ----------------------------------------------------------------------------- diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt index e934b97a75cf6c..e7f98e16125afa 100644 --- a/Documentation/gitdiffcore.txt +++ b/Documentation/gitdiffcore.txt @@ -245,33 +245,34 @@ diffcore-pickaxe: For Detecting Addition/Deletion of Specified String This transformation limits the set of filepairs to those that change specified strings between the preimage and the postimage in a certain -way. `--patch-modifies=` (`-S` for short) and -`--patch-grep=` (`-G` for short) are used to specify -different ways these strings are sought. - -`-S` detects filepairs whose preimage and postimage -have different number of occurrences of the specified __. -By definition, it will not detect in-file moves. Also, when a -changeset moves a file wholesale without affecting the interesting -string, diffcore-rename kicks in as usual, and `-S` omits the filepair -(since the number of occurrences of that string didn't change in that -rename-detected filepair). When used with `--pickaxe-regex`, treat -the __ as an extended POSIX regular expression to match, -instead of a literal string. - -`-G` (mnemonic: grep) detects filepairs whose textual diff has -an added or a deleted line that matches the given __. This -means that it will detect in-file (or what rename-detection considers -the same file) moves, which is noise. The implementation runs diff -twice and greps, and this can be quite expensive. To speed things up, -binary files without textconv filters will be ignored. - -When `-S` or `-G` are used without `--pickaxe-all`, only filepairs -that match their respective criterion are kept in the output. When -`--pickaxe-all` is used, if even one filepair matches their respective -criterion in a changeset, the entire changeset is kept. This behavior -is designed to make reviewing changes in the context of the whole -changeset easier. +way. `--patch-modifies=` and `--patch-grep=` are used +to specify different ways these strings are sought. + +`--patch-modifies=` (`-S` for short) detects filepairs +whose preimage and postimage have different number of occurrences of +the specified __. By definition, it will not detect in-file +moves. Also, when a changeset moves a file wholesale without +affecting the interesting string, diffcore-rename kicks in as usual, +and `--patch-modifies` omits the filepair (since the number of +occurrences of that string didn't change in that rename-detected +filepair). When used with `--pickaxe-regex`, treat the __ as +an extended POSIX regular expression to match, instead of a literal +string. + +`--patch-grep=` (`-G` for short, mnemonic: grep) detects +filepairs whose textual diff has an added or a deleted line that +matches the given regular expression. This means that it will detect +in-file (or what rename-detection considers the same file) moves, +which is noise. The implementation runs diff twice and greps, and +this can be quite expensive. To speed things up, binary files without +textconv filters will be ignored. + +When `--patch-modifies` or `--patch-grep` are used without +`--pickaxe-all`, only filepairs that match their respective criterion +are kept in the output. When `--pickaxe-all` is used, if even one +filepair matches their respective criterion in a changeset, the entire +changeset is kept. This behavior is designed to make reviewing +changes in the context of the whole changeset easier. diffcore-order: For Sorting the Output Based on Filenames --------------------------------------------------------- From 4e2609601248b96297febd6e39a59f45c0d0013c Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Fri, 21 Feb 2025 14:04:42 -0500 Subject: [PATCH 12/58] git-compat-util: add strtoul_ul() with error handling We already have strtoul_ui() and similar functions that provide proper error handling using strtoul from the standard library. However, there isn't currently a variant that returns an unsigned long. This commit introduces strtoul_ul() to address this gap, enabling the return of an unsigned long with proper error handling. Signed-off-by: Junio C Hamano --- git-compat-util.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index e123288e8f1393..0e9a43351ad6a2 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1353,6 +1353,26 @@ static inline int strtoul_ui(char const *s, int base, unsigned int *result) return 0; } +/* + * Convert a string to an unsigned long using the standard library's strtoul, + * with additional error handling to ensure robustness. + */ +static inline int strtoul_ul(char const *s, int base, unsigned long *result) +{ + unsigned long ul; + char *p; + + errno = 0; + /* negative values would be accepted by strtoul */ + if (strchr(s, '-')) + return -1; + ul = strtoul(s, &p, base); + if (errno || *p || p == s ) + return -1; + *result = ul; + return 0; +} + static inline int strtol_i(char const *s, int base, int *result) { long ul; From 219877e46581872adb47141cf85371f174f484ac Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Fri, 21 Feb 2025 14:04:43 -0500 Subject: [PATCH 13/58] cat-file: add declaration of variable i inside its for loop Some code used in this series declares variable i and only uses it in a for loop, not in any other logic outside the loop. Change the declaration of i to be inside the for loop for readability. While at it, we also change its type from "int" to "size_t" where the latter makes more sense. Helped-by: Christian Couder Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 11 +++-------- fetch-pack.c | 3 +-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b13561cf73b11b..69ea642dc66cec 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -676,12 +676,10 @@ static void dispatch_calls(struct batch_options *opt, struct queued_cmd *cmd, int nr) { - int i; - if (!opt->buffer_output) die(_("flush is only for --buffer mode")); - for (i = 0; i < nr; i++) + for (size_t i = 0; i < nr; i++) cmd[i].fn(opt, cmd[i].line, output, data); fflush(stdout); @@ -689,9 +687,7 @@ static void dispatch_calls(struct batch_options *opt, static void free_cmds(struct queued_cmd *cmd, size_t *nr) { - size_t i; - - for (i = 0; i < *nr; i++) + for (size_t i = 0; i < *nr; i++) FREE_AND_NULL(cmd[i].line); *nr = 0; @@ -717,7 +713,6 @@ static void batch_objects_command(struct batch_options *opt, size_t alloc = 0, nr = 0; while (strbuf_getdelim_strip_crlf(&input, stdin, opt->input_delim) != EOF) { - int i; const struct parse_cmd *cmd = NULL; const char *p = NULL, *cmd_end; struct queued_cmd call = {0}; @@ -727,7 +722,7 @@ static void batch_objects_command(struct batch_options *opt, if (isspace(*input.buf)) die(_("whitespace before command: '%s'"), input.buf); - for (i = 0; i < ARRAY_SIZE(commands); i++) { + for (size_t i = 0; i < ARRAY_SIZE(commands); i++) { if (!skip_prefix(input.buf, commands[i].name, &cmd_end)) continue; diff --git a/fetch-pack.c b/fetch-pack.c index 1ed5e11dd56857..71fb2ca05438fd 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1331,9 +1331,8 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, if (advertise_sid && server_supports_v2("session-id")) packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); if (server_options && server_options->nr) { - int i; ensure_server_supports_v2("server-option"); - for (i = 0; i < server_options->nr; i++) + for (size_t i = 0; i < server_options->nr; i++) packet_buf_write(req_buf, "server-option=%s", server_options->items[i].string); } From d138188f984f79202e08697c5ada0c394d3ad697 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Fri, 21 Feb 2025 14:04:44 -0500 Subject: [PATCH 14/58] t1006: split test utility functions into new "lib-cat-file.sh" This refactor extracts utility functions from the cat-file's test script "t1006-cat-file.sh" into a new "lib-cat-file.sh" dedicated library file. The goal is to improve code reuse and readability, enabling future tests to leverage these utilities without duplicating code. Signed-off-by: Junio C Hamano --- t/lib-cat-file.sh | 16 ++++++++++++++++ t/t1006-cat-file.sh | 13 +------------ 2 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 t/lib-cat-file.sh diff --git a/t/lib-cat-file.sh b/t/lib-cat-file.sh new file mode 100644 index 00000000000000..44af232d74113e --- /dev/null +++ b/t/lib-cat-file.sh @@ -0,0 +1,16 @@ +# Library of git-cat-file related test functions. + +# Print a string without a trailing newline. +echo_without_newline () { + printf '%s' "$*" +} + +# Print a string without newlines and replace them with a NULL character (\0). +echo_without_newline_nul () { + echo_without_newline "$@" | tr '\n' '\0' +} + +# Calculate the length of a string. +strlen () { + echo_without_newline "$1" | wc -c | sed -e 's/^ *//' +} diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 398865d6ebe9c6..1c27c10c6f7ef2 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -3,6 +3,7 @@ test_description='git cat-file' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-cat-file.sh test_cmdmode_usage () { test_expect_code 129 "$@" 2>err && @@ -98,18 +99,6 @@ do ' done -echo_without_newline () { - printf '%s' "$*" -} - -echo_without_newline_nul () { - echo_without_newline "$@" | tr '\n' '\0' -} - -strlen () { - echo_without_newline "$1" | wc -c | sed -e 's/^ *//' -} - run_tests () { type=$1 oid=$2 From 4278fbdd486c6c3c84a2612285ce8b12dc898718 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Fri, 21 Feb 2025 14:04:45 -0500 Subject: [PATCH 15/58] fetch-pack: refactor packet writing Refactor write_fetch_command_and_capabilities() to a more general-purpose function, write_command_and_capabilities(), enabling it to serve both fetch and additional commands. In this context, "command" refers to the "operations" supported by Git's wire protocol https://git-scm.com/docs/protocol-v2, such as a Git subcommand (e.g., git-fetch(1)) or a server-side operation like "object-info" as implemented in commit a2ba162c (object-info: support for retrieving object info, 2021-04-20). Furthermore, write_command_and_capabilities() is moved to connect.c, making it accessible to additional commands in the future. To move write_command_and_capabilities() to connect.c, we need to adjust how `advertise_sid` is managed. Previously, in fetch_pack.c, `advertise_sid` was a static variable, modified using git_config_get_bool(). In connect.c, we now initialize `advertise_sid` at the beginning by directly using git_config_get_bool(). This change is safe because: In the original fetch-pack.c code, there are only two places that write `advertise_sid` : 1. In function do_fetch_pack: if (!server_supports("session-id")) advertise_sid = 0; 2. In function fetch_pack_config(): git_config_get_bool("transfer.advertisesid", &advertise_sid); About 1, since do_fetch_pack() is only relevant for protocol v1, this assignment can be ignored in our refactor, as write_command_and_capabilities() is only used in protocol v2. About 2, git_config_get_bool() is from config.h and it is an out-of-box dependency of connect.c, so we can reuse it directly. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- connect.c | 34 ++++++++++++++++++++++++++++++++++ connect.h | 8 ++++++++ fetch-pack.c | 35 ++--------------------------------- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/connect.c b/connect.c index 91f399001449c8..6647b4a4b6d45d 100644 --- a/connect.c +++ b/connect.c @@ -688,6 +688,40 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options) +{ + const char *hash_name; + int advertise_sid; + + git_config_get_bool("transfer.advertisesid", &advertise_sid); + + ensure_server_supports_v2(command); + packet_buf_write(req_buf, "command=%s", command); + if (server_supports_v2("agent")) + packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); + if (advertise_sid && server_supports_v2("session-id")) + packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); + if (server_options && server_options->nr) { + ensure_server_supports_v2("server-option"); + for (size_t i = 0; i < server_options->nr; i++) + packet_buf_write(req_buf, "server-option=%s", + server_options->items[i].string); + } + + if (server_feature_v2("object-format", &hash_name)) { + const int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo_by_ptr(the_hash_algo) != hash_algo) + die(_("mismatched algorithms: client %s; server %s"), + the_hash_algo->name, hash_name); + packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + die(_("the server does not support algorithm '%s'"), + the_hash_algo->name); + } + packet_buf_delim(req_buf); +} + enum protocol { PROTO_LOCAL = 1, PROTO_FILE, diff --git a/connect.h b/connect.h index 1645126c17f889..d904c73a856b11 100644 --- a/connect.h +++ b/connect.h @@ -30,4 +30,12 @@ void check_stateless_delimiter(int stateless_rpc, struct packet_reader *reader, const char *error); +/* + * Writes a command along with the requested + * server capabilities/features into a request buffer. + */ +struct string_list; +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index 71fb2ca05438fd..19b4a092eadab6 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1319,37 +1319,6 @@ static int add_haves(struct fetch_negotiator *negotiator, return haves_added; } -static void write_fetch_command_and_capabilities(struct strbuf *req_buf, - const struct string_list *server_options) -{ - const char *hash_name; - - ensure_server_supports_v2("fetch"); - packet_buf_write(req_buf, "command=fetch"); - if (server_supports_v2("agent")) - packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); - if (advertise_sid && server_supports_v2("session-id")) - packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); - if (server_options && server_options->nr) { - ensure_server_supports_v2("server-option"); - for (size_t i = 0; i < server_options->nr; i++) - packet_buf_write(req_buf, "server-option=%s", - server_options->items[i].string); - } - - if (server_feature_v2("object-format", &hash_name)) { - int hash_algo = hash_algo_by_name(hash_name); - if (hash_algo_by_ptr(the_hash_algo) != hash_algo) - die(_("mismatched algorithms: client %s; server %s"), - the_hash_algo->name, hash_name); - packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); - } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { - die(_("the server does not support algorithm '%s'"), - the_hash_algo->name); - } - packet_buf_delim(req_buf); -} - static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, @@ -1360,7 +1329,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, int done_sent = 0; struct strbuf req_buf = STRBUF_INIT; - write_fetch_command_and_capabilities(&req_buf, args->server_options); + write_command_and_capabilities(&req_buf, "fetch", args->server_options); if (args->use_thin_pack) packet_buf_write(&req_buf, "thin-pack"); @@ -2188,7 +2157,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, the_repository, "%d", negotiation_round); strbuf_reset(&req_buf); - write_fetch_command_and_capabilities(&req_buf, server_options); + write_command_and_capabilities(&req_buf, "fetch", server_options); packet_buf_write(&req_buf, "wait-for-done"); From 9e471b69619848d48b5887171b7195ee649b879a Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Fri, 21 Feb 2025 14:04:46 -0500 Subject: [PATCH 16/58] fetch-pack: move fetch initialization There are some variables initialized at the start of the do_fetch_pack_v2() state machine. Currently, they are initialized in FETCH_CHECK_LOCAL, which is the initial state set at the beginning of the function. However, a subsequent patch will allow for another initial state, while still requiring these initialized variables. Move the initialization to be before the state machine, so that they are set regardless of the initial state. Note that there is no change in behavior, because we're moving code from the beginning of the first state to just before the execution of the state machine. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- fetch-pack.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 19b4a092eadab6..35dccea073dd9c 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1650,18 +1650,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, reader.me = "fetch-pack"; } + /* v2 supports these by default */ + allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; + use_sideband = 2; + if (args->depth > 0 || args->deepen_since || args->deepen_not) + args->deepen = 1; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); - /* v2 supports these by default */ - allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; - use_sideband = 2; - if (args->depth > 0 || args->deepen_since || args->deepen_not) - args->deepen = 1; - /* Filter 'ref' by 'sought' and those that aren't local */ mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); From 6f4f191fd104d28584edf39c3394f11b8b8c72e3 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Fri, 21 Feb 2025 14:04:47 -0500 Subject: [PATCH 17/58] serve: advertise object-info feature In order for a client to know what object-info components a server can provide, advertise supported object-info features. This will allow a client to decide whether to query the server for object-info or fetch as a fallback. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- serve.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/serve.c b/serve.c index f6dfe34a2bee6b..92fd26fd0a8bdd 100644 --- a/serve.c +++ b/serve.c @@ -68,7 +68,7 @@ static void session_id_receive(struct repository *r UNUSED, trace2_data_string("transfer", NULL, "client-sid", client_sid); } -static int object_info_advertise(struct repository *r, struct strbuf *value UNUSED) +static int object_info_advertise(struct repository *r, struct strbuf *value) { if (advertise_object_info == -1 && repo_config_get_bool(r, "transfer.advertiseobjectinfo", @@ -76,6 +76,8 @@ static int object_info_advertise(struct repository *r, struct strbuf *value UNUS /* disabled by default */ advertise_object_info = 0; } + if (value && advertise_object_info) + strbuf_addstr(value, "size"); return advertise_object_info; } From c2f2af1d09f6c7f50a40ed51e6d9f834b73f5533 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Fri, 21 Feb 2025 14:04:48 -0500 Subject: [PATCH 18/58] transport: add client support for object-info Sometimes, it is beneficial to retrieve information about an object without downloading it entirely. The server-side logic for this functionality was implemented in commit "a2ba162cda (object-info: support for retrieving object info, 2021-04-20)." And the wire format is documented at https://git-scm.com/docs/protocol-v2#_object_info. This commit introduces client functions to interact with the server. Currently, the client supports requesting a list of object IDs with the 'size' feature from a v2 server. If the server does not advertise this feature (i.e., transfer.advertiseobjectinfo is set to false), the client will return an error and exit. Notice that the entire request is written into req_buf before being sent to the remote. This approach follows the pattern used in the `send_fetch_request()` logic within fetch-pack.c. Streaming the request is not addressed in this patch. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- Makefile | 1 + fetch-object-info.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ fetch-object-info.h | 22 ++++++++++++ fetch-pack.c | 3 ++ fetch-pack.h | 2 ++ transport-helper.c | 11 ++++-- transport.c | 28 ++++++++++++++- transport.h | 11 ++++++ 8 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 fetch-object-info.c create mode 100644 fetch-object-info.h diff --git a/Makefile b/Makefile index bcf5ed3f85fafc..bd6786a3d986bf 100644 --- a/Makefile +++ b/Makefile @@ -1030,6 +1030,7 @@ LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec-cmd.o LIB_OBJS += fetch-negotiator.o LIB_OBJS += fetch-pack.o +LIB_OBJS += fetch-object-info.o LIB_OBJS += fmt-merge-msg.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o diff --git a/fetch-object-info.c b/fetch-object-info.c new file mode 100644 index 00000000000000..b279e06dc8c385 --- /dev/null +++ b/fetch-object-info.c @@ -0,0 +1,85 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "hex.h" +#include "pkt-line.h" +#include "connect.h" +#include "oid-array.h" +#include "object-store-ll.h" +#include "fetch-object-info.h" +#include "string-list.h" + +/* Sends git-cat-file object-info command and its arguments into the request buffer. */ +static void send_object_info_request(const int fd_out, struct object_info_args *args) +{ + struct strbuf req_buf = STRBUF_INIT; + + write_command_and_capabilities(&req_buf, "object-info", args->server_options); + + if (unsorted_string_list_has_string(args->object_info_options, "size")) + packet_buf_write(&req_buf, "size"); + + if (args->oids) + for (size_t i = 0; i < args->oids->nr; i++) + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); + + packet_buf_flush(&req_buf); + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) + die_errno(_("unable to write request to remote")); + + strbuf_release(&req_buf); +} + +int fetch_object_info(const enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + const int stateless_rpc, const int fd_out) +{ + int size_index = -1; + + switch (version) { + case protocol_v2: + if (!server_supports_v2("object-info")) + die(_("object-info capability is not enabled on the server")); + send_object_info_request(fd_out, args); + break; + case protocol_v1: + case protocol_v0: + die(_("unsupported protocol version. expected v2")); + case protocol_unknown_version: + BUG("unknown protocol version"); + } + + for (size_t i = 0; i < args->object_info_options->nr; i++) { + if (packet_reader_read(reader) != PACKET_READ_NORMAL) { + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); + return -1; + } + if (!string_list_has_string(args->object_info_options, reader->line)) + return -1; + if (!strcmp(reader->line, "size")) { + size_index = i; + for (size_t j = 0; j < args->oids->nr; j++) + object_info_data[j].sizep = xcalloc(1, sizeof(*object_info_data[j].sizep)); + } + } + + for (size_t i = 0; packet_reader_read(reader) == PACKET_READ_NORMAL && i < args->oids->nr; i++){ + struct string_list object_info_values = STRING_LIST_INIT_DUP; + + string_list_split(&object_info_values, reader->line, ' ', -1); + if (0 <= size_index) { + if (!strcmp(object_info_values.items[1 + size_index].string, "")) + die("object-info: not our ref %s", + object_info_values.items[0].string); + + if (strtoul_ul(object_info_values.items[1 + size_index].string, 10, object_info_data[i].sizep)) + die("object-info: ref %s has invalid size %s", + object_info_values.items[0].string, + object_info_values.items[1 + size_index].string); + } + + string_list_clear(&object_info_values, 0); + } + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); + + return 0; +} diff --git a/fetch-object-info.h b/fetch-object-info.h new file mode 100644 index 00000000000000..6184d04d7234c9 --- /dev/null +++ b/fetch-object-info.h @@ -0,0 +1,22 @@ +#ifndef FETCH_OBJECT_INFO_H +#define FETCH_OBJECT_INFO_H + +#include "pkt-line.h" +#include "protocol.h" +#include "object-store-ll.h" + +struct object_info_args { + struct string_list *object_info_options; + const struct string_list *server_options; + struct oid_array *oids; +}; + +/* + * Sends git-cat-file object-info command into the request buf and read the + * results from packets. + */ +int fetch_object_info(enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + int stateless_rpc, int fd_out); + +#endif /* FETCH_OBJECT_INFO_H */ diff --git a/fetch-pack.c b/fetch-pack.c index 35dccea073dd9c..92e8a7291ceecc 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1656,6 +1656,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (args->depth > 0 || args->deepen_since || args->deepen_not) args->deepen = 1; + if (args->object_info) + state = FETCH_SEND_REQUEST; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: diff --git a/fetch-pack.h b/fetch-pack.h index 9d3470366f85ec..119d3369f1047b 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -16,6 +16,7 @@ struct fetch_pack_args { const struct string_list *deepen_not; struct list_objects_filter_options filter_options; const struct string_list *server_options; + struct object_info *object_info_data; /* * If not NULL, during packfile negotiation, fetch-pack will send "have" @@ -42,6 +43,7 @@ struct fetch_pack_args { unsigned reject_shallow_remote:1; unsigned deepen:1; unsigned refetch:1; + unsigned object_info:1; /* * Indicate that the remote of this request is a promisor remote. The diff --git a/transport-helper.c b/transport-helper.c index d457b425501a74..9da1547b2c4eec 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -710,8 +710,8 @@ static int fetch_refs(struct transport *transport, /* * If we reach here, then the server, the client, and/or the transport - * helper does not support protocol v2. --negotiate-only requires - * protocol v2. + * helper does not support protocol v2. --negotiate-only and cat-file + * remote-object-info require protocol v2. */ if (data->transport_options.acked_commits) { warning(_("--negotiate-only requires protocol v2")); @@ -727,6 +727,13 @@ static int fetch_refs(struct transport *transport, free_refs(dummy); } + /* fail the command explicitly to avoid further commands input. */ + if (transport->smart_options->object_info) + die(_("remote-object-info requires protocol v2")); + + if (!data->get_refs_list_called) + get_refs_list_using_list(transport, 0); + count = 0; for (i = 0; i < nr_heads; i++) if (!(to_fetch[i]->status & REF_STATUS_UPTODATE)) diff --git a/transport.c b/transport.c index 6c2801bcbd9d02..95be3771a6c4f5 100644 --- a/transport.c +++ b/transport.c @@ -9,6 +9,7 @@ #include "hook.h" #include "pkt-line.h" #include "fetch-pack.h" +#include "fetch-object-info.h" #include "remote.h" #include "connect.h" #include "send-pack.h" @@ -465,8 +466,33 @@ static int fetch_refs_via_pack(struct transport *transport, args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; + args.object_info = transport->smart_options->object_info; + + if (transport->smart_options->object_info + && transport->smart_options->object_info_oids->nr > 0) { + struct packet_reader reader; + struct object_info_args obj_info_args = { 0 }; + + obj_info_args.server_options = transport->server_options; + obj_info_args.oids = transport->smart_options->object_info_oids; + obj_info_args.object_info_options = transport->smart_options->object_info_options; + string_list_sort(obj_info_args.object_info_options); + + connect_setup(transport, 0); + packet_reader_init(&reader, data->fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_DIE_ON_ERR_PACKET); + + data->version = discover_version(&reader); + transport->hash_algo = reader.hash_algo; + + ret = fetch_object_info(data->version, &obj_info_args, &reader, + data->options.object_info_data, transport->stateless_rpc, + data->fd[1]); + goto cleanup; - if (!data->finished_handshake) { + } else if (!data->finished_handshake) { int i; int must_list_refs = 0; for (i = 0; i < nr_heads; i++) { diff --git a/transport.h b/transport.h index 44100fa9b7fdd6..e61e93186392e9 100644 --- a/transport.h +++ b/transport.h @@ -5,6 +5,7 @@ #include "remote.h" #include "list-objects-filter-options.h" #include "string-list.h" +#include "object-store.h" struct git_transport_options { unsigned thin : 1; @@ -30,6 +31,12 @@ struct git_transport_options { */ unsigned connectivity_checked:1; + /* + * Transport will attempt to retrieve only object-info. + * If object-info is not supported, the operation will error and exit. + */ + unsigned object_info : 1; + int depth; const char *deepen_since; const struct string_list *deepen_not; @@ -53,6 +60,10 @@ struct git_transport_options { * common commits to this oidset instead of fetching any packfiles. */ struct oidset *acked_commits; + + struct oid_array *object_info_oids; + struct object_info *object_info_data; + struct string_list *object_info_options; }; enum transport_family { From 802a553add113ff0fe93eb3690d88ac97aa91c99 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Fri, 21 Feb 2025 14:04:49 -0500 Subject: [PATCH 19/58] cat-file: add remote-object-info to batch-command Since the `info` command in `cat-file --batch-command` prints object info for a given object, it is natural to add another command in `cat-file --batch-command` to print object info for a given object from a remote. Add `remote-object-info` to `cat-file --batch-command`. While `info` takes object ids one at a time, this creates overhead when making requests to a server. So `remote-object-info` instead can take multiple object ids at once. The `cat-file --batch-command` command is generally implemented in the following manner: - Receive and parse input from user - Call respective function attached to command - Get object info, print object info In --buffer mode, this changes to: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue - Call respective function attached to command - Get object info, print object info Notice how the getting and printing of object info is accomplished one at a time. As described above, this creates a problem for making requests to a server. Therefore, `remote-object-info` is implemented in the following manner: - Receive and parse input from user If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Parse input, get object info, print object info And finally for --buffer mode `remote-object-info`: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue: If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Get object info, print object info To summarize, `remote-object-info` gets object info from the remote and then loops through the object info passed in, printing the info. In order for `remote-object-info` to avoid remote communication overhead in the non-buffer mode, the objects are passed in as such: remote-object-info ... rather than remote-object-info remote-object-info ... remote-object-info Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 24 +- builtin/cat-file.c | 114 +++++ object-file.c | 11 + object-store-ll.h | 3 + t/t1017-cat-file-remote-object-info.sh | 664 +++++++++++++++++++++++++ 5 files changed, 812 insertions(+), 4 deletions(-) create mode 100755 t/t1017-cat-file-remote-object-info.sh diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index d5890ae3686f6b..4fbb3a077bc87f 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -149,6 +149,13 @@ info :: Print object info for object reference ``. This corresponds to the output of `--batch-check`. +remote-object-info ...:: + Print object info for object references `` at specified + `` without downloading objects from the remote. + Raise an error when the `object-info` capability is not supported by the remote. + Raise an error when no object references are provided. + This command may be combined with `--buffer`. + flush:: Used with `--buffer` to execute all preceding commands that were issued since the beginning or since the last flush was issued. When `--buffer` @@ -290,7 +297,8 @@ newline. The available atoms are: The full hex representation of the object name. `objecttype`:: - The type of the object (the same as `cat-file -t` reports). + The type of the object (the same as `cat-file -t` reports). See + `CAVEATS` below. Not supported by `remote-object-info`. `objectsize`:: The size, in bytes, of the object (the same as `cat-file -s` @@ -298,13 +306,14 @@ newline. The available atoms are: `objectsize:disk`:: The size, in bytes, that the object takes up on disk. See the - note about on-disk sizes in the `CAVEATS` section below. + note about on-disk sizes in the `CAVEATS` section below. Not + supported by `remote-object-info`. `deltabase`:: If the object is stored as a delta on-disk, this expands to the full hex representation of the delta base object name. Otherwise, expands to the null OID (all zeroes). See `CAVEATS` - below. + below. Not supported by `remote-object-info`. `rest`:: If this atom is used in the output string, input lines are split @@ -314,7 +323,10 @@ newline. The available atoms are: line) are output in place of the `%(rest)` atom. If no format is specified, the default format is `%(objectname) -%(objecttype) %(objectsize)`. +%(objecttype) %(objectsize)`, except for `remote-object-info` commands which use +`%(objectname) %(objectsize)` for now because "%(objecttype)" is not supported yet. +WARNING: When "%(objecttype)" is supported, the default format WILL be unified, so +DO NOT RELY on the current default format to stay the same!!! If `--batch` is specified, or if `--batch-command` is used with the `contents` command, the object information is followed by the object contents (consisting @@ -396,6 +408,10 @@ scripting purposes. CAVEATS ------- +Note that since %(objecttype), %(objectsize:disk) and %(deltabase) are +currently not supported by the `remote-object-info` command, we will raise +an error and exit when they appear in the format string. + Note that the sizes of objects on disk are reported accurately, but care should be taken in drawing conclusions about which refs or objects are responsible for disk usage. The size of a packed non-delta object may be diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 69ea642dc66cec..47fd2a777baf38 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -27,6 +27,18 @@ #include "promisor-remote.h" #include "mailmap.h" #include "write-or-die.h" +#include "alias.h" +#include "remote.h" +#include "transport.h" + +/* Maximum length for a remote URL. While no universal standard exists, + * 8K is assumed to be a reasonable limit. + */ +#define MAX_REMOTE_URL_LEN (8*1024) +/* Maximum number of objects allowed in a single remote-object-info request. */ +#define MAX_ALLOWED_OBJ_LIMIT 10000 +/* Maximum input size permitted for the remote-object-info command. */ +#define MAX_REMOTE_OBJ_INFO_LINE (MAX_REMOTE_URL_LEN + MAX_ALLOWED_OBJ_LIMIT * (GIT_MAX_HEXSZ + 1)) enum batch_mode { BATCH_MODE_CONTENTS, @@ -48,6 +60,8 @@ struct batch_options { }; static const char *force_path; +static struct object_info *remote_object_info; +static struct oid_array object_info_oids = OID_ARRAY_INIT; static struct string_list mailmap = STRING_LIST_INIT_NODUP; static int use_mailmap; @@ -579,6 +593,61 @@ static void batch_one_object(const char *obj_name, object_context_release(&ctx); } +static int get_remote_info(struct batch_options *opt, int argc, const char **argv) +{ + int retval = 0; + struct remote *remote = NULL; + struct object_id oid; + struct string_list object_info_options = STRING_LIST_INIT_NODUP; + static struct transport *gtransport; + + /* + * Change the format to "%(objectname) %(objectsize)" when + * remote-object-info command is used. Once we start supporting objecttype + * the default format should change to DEFAULT_FORMAT. + */ + if (!opt->format) + opt->format = "%(objectname) %(objectsize)"; + + remote = remote_get(argv[0]); + if (!remote) + die(_("must supply valid remote when using remote-object-info")); + + oid_array_clear(&object_info_oids); + for (size_t i = 1; i < argc; i++) { + if (get_oid_hex(argv[i], &oid)) + die(_("Not a valid object name %s"), argv[i]); + oid_array_append(&object_info_oids, &oid); + } + if (!object_info_oids.nr) + die(_("remote-object-info requires objects")); + + gtransport = transport_get(remote, NULL); + if (gtransport->smart_options) { + CALLOC_ARRAY(remote_object_info, object_info_oids.nr); + gtransport->smart_options->object_info = 1; + gtransport->smart_options->object_info_oids = &object_info_oids; + + /* 'objectsize' is the only option currently supported */ + if (!strstr(opt->format, "%(objectsize)")) + die(_("%s is currently not supported with remote-object-info"), opt->format); + + string_list_append(&object_info_options, "size"); + + if (object_info_options.nr > 0) { + gtransport->smart_options->object_info_options = &object_info_options; + gtransport->smart_options->object_info_data = remote_object_info; + retval = transport_fetch_refs(gtransport, NULL); + } + } else { + retval = -1; + } + + string_list_clear(&object_info_options, 0); + transport_disconnect(gtransport); + return retval; +} + struct object_cb_data { struct batch_options *opt; struct expand_data *expand; @@ -670,6 +739,50 @@ static void parse_cmd_info(struct batch_options *opt, batch_one_object(line, output, opt, data); } +static void parse_cmd_remote_object_info(struct batch_options *opt, + const char *line, struct strbuf *output, + struct expand_data *data) +{ + int count; + const char **argv; + char *line_to_split; + + if (strlen(line) >= MAX_REMOTE_OBJ_INFO_LINE) + die(_("remote-object-info command input overflow " + "(no more than %d objects are allowed)"), + MAX_ALLOWED_OBJ_LIMIT); + + line_to_split = xstrdup(line); + count = split_cmdline(line_to_split, &argv); + if (count < 0) + die(_("split remote-object-info command")); + + if (get_remote_info(opt, count, argv)) + goto cleanup; + + data->skip_object_info = 1; + for (size_t i = 0; i < object_info_oids.nr; i++) { + data->oid = object_info_oids.oid[i]; + if (remote_object_info[i].sizep) { + /* + * When reaching here, it means remote-object-info can retrieve + * information from server without downloading them. + */ + data->size = *remote_object_info[i].sizep; + opt->batch_mode = BATCH_MODE_INFO; + batch_object_write(argv[i+1], output, opt, data, NULL, 0); + } + } + data->skip_object_info = 0; + +cleanup: + for (size_t i = 0; i < object_info_oids.nr; i++) + free_object_info_contents(&remote_object_info[i]); + free(line_to_split); + free(argv); + free(remote_object_info); +} + static void dispatch_calls(struct batch_options *opt, struct strbuf *output, struct expand_data *data, @@ -701,6 +814,7 @@ static const struct parse_cmd { } commands[] = { { "contents", parse_cmd_contents, 1}, { "info", parse_cmd_info, 1}, + { "remote-object-info", parse_cmd_remote_object_info, 1}, { "flush", NULL, 0}, }; diff --git a/object-file.c b/object-file.c index 00c3a4b910f84c..836554437c85a3 100644 --- a/object-file.c +++ b/object-file.c @@ -3161,3 +3161,14 @@ int read_loose_object(const char *path, munmap(map, mapsize); return ret; } + +void free_object_info_contents(struct object_info *object_info) +{ + if (!object_info) + return; + free(object_info->typep); + free(object_info->sizep); + free(object_info->disk_sizep); + free(object_info->delta_base_oid); + free(object_info->type_name); +} diff --git a/object-store-ll.h b/object-store-ll.h index cd3bd5bd99f78c..20208e1d4fbe92 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -553,4 +553,7 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags); +/* Free pointers inside of object_info, but not object_info itself */ +void free_object_info_contents(struct object_info *object_info); + #endif /* OBJECT_STORE_LL_H */ diff --git a/t/t1017-cat-file-remote-object-info.sh b/t/t1017-cat-file-remote-object-info.sh new file mode 100755 index 00000000000000..fd6c63cdb95d5c --- /dev/null +++ b/t/t1017-cat-file-remote-object-info.sh @@ -0,0 +1,664 @@ +#!/bin/sh + +test_description='git cat-file --batch-command with remote-object-info command' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-cat-file.sh + +hello_content="Hello World" +hello_size=$(strlen "$hello_content") +hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) + +# This is how we get 13: +# 13 = + + + , where +# file mode is 100644, which is 6 characters; +# file name is hello, which is 5 characters +# a space is 1 character and a null is 1 character +tree_size=$(($(test_oid rawsz) + 13)) + +commit_message="Initial commit" + +# This is how we get 137: +# 137 = + + + +# + + +# + + +# + +# +# An easier way to calculate is: 1. use `git cat-file commit | wc -c`, +# to get 177, 2. then deduct 40 hex characters to get 137 +commit_size=$(($(test_oid hexsz) + 137)) + +tag_header_without_oid="type blob +tag hellotag +tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" +tag_header_without_timestamp="object $hello_oid +$tag_header_without_oid" +tag_description="This is a tag" +tag_content="$tag_header_without_timestamp 0 +0000 + +$tag_description" + +tag_oid=$(echo_without_newline "$tag_content" | git hash-object -t tag --stdin -w) +tag_size=$(strlen "$tag_content") + +set_transport_variables () { + hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) + tree_oid=$(git -C "$1" write-tree) + commit_oid=$(echo_without_newline "$commit_message" | git -C "$1" commit-tree $tree_oid) + tag_oid=$(echo_without_newline "$tag_content" | git -C "$1" hash-object -t tag --stdin -w) + tag_size=$(strlen "$tag_content") +} + +# This section tests --batch-command with remote-object-info command +# Since "%(objecttype)" is currently not supported by the command remote-object-info , +# the filters are set to "%(objectname) %(objectsize)" in some test cases. + +# Test --batch-command remote-object-info with 'git://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +. "$TEST_DIRECTORY"/lib-git-daemon.sh +start_git_daemon --export-all --enable=receive-pack +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent + +test_expect_success 'create repo to be served by git-daemon' ' + git init "$daemon_parent" && + echo_without_newline "$hello_content" > $daemon_parent/hello && + git -C "$daemon_parent" update-index --add hello && + git -C "$daemon_parent" config transfer.advertiseobjectinfo true && + git clone "$GIT_DAEMON_URL/parent" -n "$daemon_parent/daemon_client_empty" +' + +test_expect_success 'batch-command remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid + remote-object-info "$GIT_DAEMON_URL/parent" $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid + remote-object-info "$GIT_DAEMON_URL/parent" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// multiple sha1 per line' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// default filter' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + GIT_TRACE_PACKET=1 git cat-file --batch-command >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info git:// default filter' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + batch_input="remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid +remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +# Test --batch-command remote-object-info with 'git://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info git:// fails when transfer.advertiseobjectinfo=false' ' + ( + git -C "$daemon_parent" config transfer.advertiseobjectinfo false && + set_transport_variables "$daemon_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$daemon_parent" config transfer.advertiseobjectinfo true + + ) +' + +stop_git_daemon + +# Test --batch-command remote-object-info with 'file://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +# shellcheck disable=SC2016 +test_expect_success 'create repo to be served by file:// transport' ' + git init server && + git -C server config protocol.version 2 && + git -C server config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > server/hello && + git -C server update-index --add hello && + git clone -n "file://$(pwd)/server" file_client_empty +' + +test_expect_success 'batch-command remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid + remote-object-info "file://${server_path}" $tree_oid + remote-object-info "file://${server_path}" $commit_oid + remote-object-info "file://${server_path}" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// multiple sha1 per line' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid + remote-object-info "file://${server_path}" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// default filter' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid + remote-object-info "file://${server_path}" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info file:// default filter' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + batch_input="remote-object-info \"file://${server_path}\" $hello_oid $tree_oid +remote-object-info \"file://${server_path}\" $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +# Test --batch-command remote-object-info with 'file://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info file:// fails when transfer.advertiseobjectinfo=false' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + git -C "${server_path}" config transfer.advertiseobjectinfo false && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "${server_path}" config transfer.advertiseobjectinfo true + ) +' + +# Test --batch-command remote-object-info with 'http://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'create repo to be served by http:// transport' ' + git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > $HTTPD_DOCUMENT_ROOT_PATH/http_parent/hello && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" update-index --add hello && + git clone "$HTTPD_URL/smart/http_parent" -n "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" +' + +test_expect_success 'batch-command remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// one line' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info http:// default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + batch_input="remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid +remote-object-info $HTTPD_URL/smart/http_parent $commit_oid $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +test_expect_success 'remote-object-info fails on unspported filter option (objectsize:disk)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(objectsize:disk) is currently not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on unspported filter option (deltabase)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(deltabase) is currently not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "remote-object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol with default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "remote-object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id + EOF + test_grep "Not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID with default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id + EOF + test_grep "Not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on missing OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo && + test_commit -C missing_oid_repo message1 c.txt && + cd missing_oid_repo && + + object_id=$(git rev-parse message1:c.txt) && + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $object_id + EOF + test_grep "object-info: not our ref $object_id" err + ) +' + +test_expect_success 'remote-object-info fails on not providing OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" + EOF + test_grep "remote-object-info requires objects" err + ) +' + + +# Test --batch-command remote-object-info with 'http://' transport and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info http:// fails when transfer.advertiseobjectinfo=false ' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo false && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true + ) +' + +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + +test_done From 532e3cf469e505fd9eca90441234d96a6081e159 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 26 Feb 2025 16:24:26 +0100 Subject: [PATCH 20/58] reflog: rename `cmd_reflog_expire_cb` to `reflog_expire_options` We're about to expose `struct cmd_reflog_expire_cb` via "reflog.h" so that we can also use this structure in "builtin/gc.c". Once we make it accessible to a wider scope though it becomes awkwardly named, as it isn't only useful in the context of a callback. Instead, the function is containing all kinds of options relevant to whether or not a reflog entry should be expired. Rename the structure to `reflog_expire_options` to prepare for this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/reflog.c | 38 +++++++++++++++++++------------------- reflog.c | 30 +++++++++++++++--------------- reflog.h | 4 ++-- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/builtin/reflog.c b/builtin/reflog.c index 95f264989bbf1a..dee49881d326e2 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -168,7 +168,7 @@ static int reflog_expire_config(const char *var, const char *value, return 0; } -static void set_reflog_expiry_param(struct cmd_reflog_expire_cb *cb, const char *ref) +static void set_reflog_expiry_param(struct reflog_expire_options *cb, const char *ref) { struct reflog_expire_cfg *ent; @@ -207,15 +207,15 @@ static int expire_unreachable_callback(const struct option *opt, const char *arg, int unset) { - struct cmd_reflog_expire_cb *cmd = opt->value; + struct reflog_expire_options *opts = opt->value; BUG_ON_OPT_NEG(unset); - if (parse_expiry_date(arg, &cmd->expire_unreachable)) + if (parse_expiry_date(arg, &opts->expire_unreachable)) die(_("invalid timestamp '%s' given to '--%s'"), arg, opt->long_name); - cmd->explicit_expiry |= EXPIRE_UNREACH; + opts->explicit_expiry |= EXPIRE_UNREACH; return 0; } @@ -223,15 +223,15 @@ static int expire_total_callback(const struct option *opt, const char *arg, int unset) { - struct cmd_reflog_expire_cb *cmd = opt->value; + struct reflog_expire_options *opts = opt->value; BUG_ON_OPT_NEG(unset); - if (parse_expiry_date(arg, &cmd->expire_total)) + if (parse_expiry_date(arg, &opts->expire_total)) die(_("invalid timestamp '%s' given to '--%s'"), arg, opt->long_name); - cmd->explicit_expiry |= EXPIRE_TOTAL; + opts->explicit_expiry |= EXPIRE_TOTAL; return 0; } @@ -276,7 +276,7 @@ static int cmd_reflog_list(int argc, const char **argv, const char *prefix, static int cmd_reflog_expire(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { - struct cmd_reflog_expire_cb cmd = { 0 }; + struct reflog_expire_options opts = { 0 }; timestamp_t now = time(NULL); int i, status, do_all, single_worktree = 0; unsigned int flags = 0; @@ -292,15 +292,15 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix, N_("update the reference to the value of the top reflog entry"), EXPIRE_REFLOGS_UPDATE_REF), OPT_BOOL(0, "verbose", &verbose, N_("print extra information on screen")), - OPT_CALLBACK_F(0, "expire", &cmd, N_("timestamp"), + OPT_CALLBACK_F(0, "expire", &opts, N_("timestamp"), N_("prune entries older than the specified time"), PARSE_OPT_NONEG, expire_total_callback), - OPT_CALLBACK_F(0, "expire-unreachable", &cmd, N_("timestamp"), + OPT_CALLBACK_F(0, "expire-unreachable", &opts, N_("timestamp"), N_("prune entries older than