From 92d0cd41a412307cf319337ac6ea7e677975e506 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jul 2025 11:24:00 -0400 Subject: [PATCH 1/8] sparse-checkout: remove use of the_repository The logic for the 'git sparse-checkout' builtin uses the_repository all over the place, despite some use of a repository struct in different method parameters. Complete this removal of the_repository by using 'repo' when possible. In one place, there was already a local variable 'r' that was set to the_repository, so move that to a method parameter. We cannot remove the USE_THE_REPOSITORY_VARIABLE declaration as we are still using global constants for the state of the sparse-checkout. Signed-off-by: Derrick Stolee --- builtin/sparse-checkout.c | 121 ++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 57 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8a0ffba9d4b3bf..61714bf80be044 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -204,12 +204,12 @@ static void clean_tracked_sparse_directories(struct repository *r) ensure_full_index(r->index); } -static int update_working_directory(struct pattern_list *pl) +static int update_working_directory(struct repository *r, + struct pattern_list *pl) { enum update_sparsity_result result; struct unpack_trees_options o; struct lock_file lock_file = LOCK_INIT; - struct repository *r = the_repository; struct pattern_list *old_pl; /* If no branch has been checked out, there are no updates to make. */ @@ -327,7 +327,8 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) string_list_clear(&sl, 0); } -static int write_patterns_and_update(struct pattern_list *pl) +static int write_patterns_and_update(struct repository *repo, + struct pattern_list *pl) { char *sparse_filename; FILE *fp; @@ -336,15 +337,15 @@ static int write_patterns_and_update(struct pattern_list *pl) sparse_filename = get_sparse_checkout_filename(); - if (safe_create_leading_directories(the_repository, sparse_filename)) + if (safe_create_leading_directories(repo, sparse_filename)) die(_("failed to create directory for sparse-checkout file")); hold_lock_file_for_update(&lk, sparse_filename, LOCK_DIE_ON_ERROR); - result = update_working_directory(pl); + result = update_working_directory(repo, pl); if (result) { rollback_lock_file(&lk); - update_working_directory(NULL); + update_working_directory(repo, NULL); goto out; } @@ -372,25 +373,26 @@ enum sparse_checkout_mode { MODE_CONE_PATTERNS = 2, }; -static int set_config(enum sparse_checkout_mode mode) +static int set_config(struct repository *repo, + enum sparse_checkout_mode mode) { /* Update to use worktree config, if not already. */ - if (init_worktree_config(the_repository)) { + if (init_worktree_config(repo)) { error(_("failed to initialize worktree config")); return 1; } - if (repo_config_set_worktree_gently(the_repository, + if (repo_config_set_worktree_gently(repo, "core.sparseCheckout", mode ? "true" : "false") || - repo_config_set_worktree_gently(the_repository, + repo_config_set_worktree_gently(repo, "core.sparseCheckoutCone", mode == MODE_CONE_PATTERNS ? "true" : "false")) return 1; if (mode == MODE_NO_PATTERNS) - return set_sparse_index_config(the_repository, 0); + return set_sparse_index_config(repo, 0); return 0; } @@ -410,7 +412,7 @@ static enum sparse_checkout_mode update_cone_mode(int *cone_mode) { return MODE_ALL_PATTERNS; } -static int update_modes(int *cone_mode, int *sparse_index) +static int update_modes(struct repository *repo, int *cone_mode, int *sparse_index) { int mode, record_mode; @@ -418,20 +420,20 @@ static int update_modes(int *cone_mode, int *sparse_index) record_mode = (*cone_mode != -1) || !the_repository->settings.sparse_checkout; mode = update_cone_mode(cone_mode); - if (record_mode && set_config(mode)) + if (record_mode && set_config(repo, mode)) return 1; /* Set sparse-index/non-sparse-index mode if specified */ if (*sparse_index >= 0) { - if (set_sparse_index_config(the_repository, *sparse_index) < 0) + if (set_sparse_index_config(repo, *sparse_index) < 0) die(_("failed to modify sparse-index config")); /* force an index rewrite */ - repo_read_index(the_repository); - the_repository->index->updated_workdir = 1; + repo_read_index(repo); + repo->index->updated_workdir = 1; if (!*sparse_index) - ensure_full_index(the_repository->index); + ensure_full_index(repo->index); } return 0; @@ -448,7 +450,7 @@ static struct sparse_checkout_init_opts { } init_opts; static int sparse_checkout_init(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct pattern_list pl; char *sparse_filename; @@ -464,7 +466,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, }; setup_work_tree(); - repo_read_index(the_repository); + repo_read_index(repo); init_opts.cone_mode = -1; init_opts.sparse_index = -1; @@ -473,7 +475,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, builtin_sparse_checkout_init_options, builtin_sparse_checkout_init_usage, 0); - if (update_modes(&init_opts.cone_mode, &init_opts.sparse_index)) + if (update_modes(repo, &init_opts.cone_mode, &init_opts.sparse_index)) return 1; memset(&pl, 0, sizeof(pl)); @@ -485,14 +487,14 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, if (res >= 0) { free(sparse_filename); clear_pattern_list(&pl); - return update_working_directory(NULL); + return update_working_directory(repo, NULL); } - if (repo_get_oid(the_repository, "HEAD", &oid)) { + if (repo_get_oid(repo, "HEAD", &oid)) { FILE *fp; /* assume we are in a fresh repo, but update the sparse-checkout file */ - if (safe_create_leading_directories(the_repository, sparse_filename)) + if (safe_create_leading_directories(repo, sparse_filename)) die(_("unable to create leading directories of %s"), sparse_filename); fp = xfopen(sparse_filename, "w"); @@ -511,7 +513,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, add_pattern("!/*/", empty_base, 0, &pl, 0); pl.use_cone_patterns = init_opts.cone_mode; - return write_patterns_and_update(&pl); + return write_patterns_and_update(repo, &pl); } static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) @@ -674,7 +676,8 @@ static void add_patterns_literal(int argc, const char **argv, add_patterns_from_input(pl, argc, argv, use_stdin ? stdin : NULL); } -static int modify_pattern_list(struct strvec *args, int use_stdin, +static int modify_pattern_list(struct repository *repo, + struct strvec *args, int use_stdin, enum modify_type m) { int result; @@ -695,23 +698,24 @@ static int modify_pattern_list(struct strvec *args, int use_stdin, break; } - if (!the_repository->settings.sparse_checkout) { - set_config(MODE_ALL_PATTERNS); - the_repository->settings.sparse_checkout = 1; + if (!repo->settings.sparse_checkout) { + set_config(repo, MODE_ALL_PATTERNS); + repo->settings.sparse_checkout = 1; changed_config = 1; } - result = write_patterns_and_update(pl); + result = write_patterns_and_update(repo, pl); if (result && changed_config) - set_config(MODE_NO_PATTERNS); + set_config(repo, MODE_NO_PATTERNS); clear_pattern_list(pl); free(pl); return result; } -static void sanitize_paths(struct strvec *args, +static void sanitize_paths(struct repository *repo, + struct strvec *args, const char *prefix, int skip_checks) { int i; @@ -752,7 +756,7 @@ static void sanitize_paths(struct strvec *args, for (i = 0; i < args->nr; i++) { struct cache_entry *ce; - struct index_state *index = the_repository->index; + struct index_state *index = repo->index; int pos = index_name_pos(index, args->v[i], strlen(args->v[i])); if (pos < 0) @@ -779,7 +783,7 @@ static struct sparse_checkout_add_opts { } add_opts; static int sparse_checkout_add(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_add_options[] = { OPT_BOOL_F(0, "skip-checks", &add_opts.skip_checks, @@ -796,7 +800,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, if (!the_repository->settings.sparse_checkout) die(_("no sparse-checkout to add to")); - repo_read_index(the_repository); + repo_read_index(repo); argc = parse_options(argc, argv, prefix, builtin_sparse_checkout_add_options, @@ -804,9 +808,9 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, for (int i = 0; i < argc; i++) strvec_push(&patterns, argv[i]); - sanitize_paths(&patterns, prefix, add_opts.skip_checks); + sanitize_paths(repo, &patterns, prefix, add_opts.skip_checks); - ret = modify_pattern_list(&patterns, add_opts.use_stdin, ADD); + ret = modify_pattern_list(repo, &patterns, add_opts.use_stdin, ADD); strvec_clear(&patterns); return ret; @@ -825,7 +829,7 @@ static struct sparse_checkout_set_opts { } set_opts; static int sparse_checkout_set(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int default_patterns_nr = 2; const char *default_patterns[] = {"/*", "!/*/", NULL}; @@ -847,7 +851,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, int ret; setup_work_tree(); - repo_read_index(the_repository); + repo_read_index(repo); set_opts.cone_mode = -1; set_opts.sparse_index = -1; @@ -856,7 +860,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, builtin_sparse_checkout_set_options, builtin_sparse_checkout_set_usage, 0); - if (update_modes(&set_opts.cone_mode, &set_opts.sparse_index)) + if (update_modes(repo, &set_opts.cone_mode, &set_opts.sparse_index)) return 1; /* @@ -870,10 +874,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, } else { for (int i = 0; i < argc; i++) strvec_push(&patterns, argv[i]); - sanitize_paths(&patterns, prefix, set_opts.skip_checks); + sanitize_paths(repo, &patterns, prefix, set_opts.skip_checks); } - ret = modify_pattern_list(&patterns, set_opts.use_stdin, REPLACE); + ret = modify_pattern_list(repo, &patterns, set_opts.use_stdin, REPLACE); strvec_clear(&patterns); return ret; @@ -891,7 +895,7 @@ static struct sparse_checkout_reapply_opts { static int sparse_checkout_reapply(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_reapply_options[] = { OPT_BOOL(0, "cone", &reapply_opts.cone_mode, @@ -912,12 +916,12 @@ static int sparse_checkout_reapply(int argc, const char **argv, builtin_sparse_checkout_reapply_options, builtin_sparse_checkout_reapply_usage, 0); - repo_read_index(the_repository); + repo_read_index(repo); - if (update_modes(&reapply_opts.cone_mode, &reapply_opts.sparse_index)) + if (update_modes(repo, &reapply_opts.cone_mode, &reapply_opts.sparse_index)) return 1; - return update_working_directory(NULL); + return update_working_directory(repo, NULL); } static char const * const builtin_sparse_checkout_disable_usage[] = { @@ -927,7 +931,7 @@ static char const * const builtin_sparse_checkout_disable_usage[] = { static int sparse_checkout_disable(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_disable_options[] = { OPT_END(), @@ -955,7 +959,7 @@ static int sparse_checkout_disable(int argc, const char **argv, * are expecting to do that when disabling sparse-checkout. */ give_advice_on_expansion = 0; - repo_read_index(the_repository); + repo_read_index(repo); memset(&pl, 0, sizeof(pl)); hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); @@ -965,13 +969,13 @@ static int sparse_checkout_disable(int argc, const char **argv, add_pattern("/*", empty_base, 0, &pl, 0); - the_repository->settings.sparse_index = 0; + repo->settings.sparse_index = 0; - if (update_working_directory(&pl)) + if (update_working_directory(repo, &pl)) die(_("error while refreshing working directory")); clear_pattern_list(&pl); - return set_config(MODE_NO_PATTERNS); + return set_config(repo, MODE_NO_PATTERNS); } static char const * const builtin_sparse_checkout_check_rules_usage[] = { @@ -986,14 +990,17 @@ static struct sparse_checkout_check_rules_opts { char *rules_file; } check_rules_opts; -static int check_rules(struct pattern_list *pl, int null_terminated) { +static int check_rules(struct repository *repo, + struct pattern_list *pl, + int null_terminated) +{ struct strbuf line = STRBUF_INIT; struct strbuf unquoted = STRBUF_INIT; char *path; int line_terminator = null_terminated ? 0 : '\n'; strbuf_getline_fn getline_fn = null_terminated ? strbuf_getline_nul : strbuf_getline; - the_repository->index->sparse_checkout_patterns = pl; + repo->index->sparse_checkout_patterns = pl; while (!getline_fn(&line, stdin)) { path = line.buf; if (!null_terminated && line.buf[0] == '"') { @@ -1005,7 +1012,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) { path = unquoted.buf; } - if (path_in_sparse_checkout(path, the_repository->index)) + if (path_in_sparse_checkout(path, repo->index)) write_name_quoted(path, stdout, line_terminator); } strbuf_release(&line); @@ -1015,7 +1022,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) { } static int sparse_checkout_check_rules(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_check_rules_options[] = { OPT_BOOL('z', NULL, &check_rules_opts.null_termination, @@ -1054,7 +1061,7 @@ static int sparse_checkout_check_rules(int argc, const char **argv, const char * free(sparse_filename); } - ret = check_rules(&pl, check_rules_opts.null_termination); + ret = check_rules(repo, &pl, check_rules_opts.null_termination); clear_pattern_list(&pl); free(check_rules_opts.rules_file); return ret; @@ -1083,8 +1090,8 @@ int cmd_sparse_checkout(int argc, git_config(git_default_config, NULL); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; return fn(argc, argv, prefix, repo); } From 7e8f7c2d6c8c740d42bc6d157fa491b558b9ff6a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jul 2025 11:42:06 -0400 Subject: [PATCH 2/8] sparse-checkout: add basics of 'clean' command When users change their sparse-checkout definitions to add new directories and remove old ones, there may be a few reasons why directories no longer in scope remain (ignored or excluded files still exist, Windows handles are still open, etc.). When these files still exist, the sparse index feature notices that a tracked, but sparse, directory still exists on disk and thus the index expands. This causes a performance hit _and_ the advice printed isn't very helpful. Using 'git clean' isn't enough (generally '-dfx' may be needed) but also this may not be sufficient. Add a new subcommand to 'git sparse-checkout' that removes these tracked-but-sparse directories. This necessarily removes all files contained within, including tracked and untracked files. Of particular importance are ignored and excluded files which would normally be ignored even by 'git clean -f' unless the '-x' or '-X' option is provided. This is the most extreme method for doing this, but it works when the sparse-checkout is in cone mode and is expected to rescope based on directories, not files. The current implementation always deletes these sparse directories without warning. This is unacceptable for a released version, but those features will be added in changes coming immediately after this one. Note that untracked directories within the sparse-checkout remain. Further, directories that contain staged changes or files in merge conflict states are not deleted. This is a detail that is partly hidden by the implementation which relies on collapsing the index to a sparse index in-memory and only deleting directories that are listed as sparse in the index. If a staged change exists, then that entry is not stored as a sparse tree entry and thus remains on-disk until committed or reset. There are some interesting cases around merge conflict resolution, but that will be carefully analyzed in the future. Signed-off-by: Derrick Stolee --- Documentation/git-sparse-checkout.adoc | 11 ++++- builtin/sparse-checkout.c | 64 +++++++++++++++++++++++++- t/t1091-sparse-checkout-builtin.sh | 38 +++++++++++++++ 3 files changed, 111 insertions(+), 2 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 529a8edd9c1ed8..6db88f00781d68 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files SYNOPSIS -------- [verse] -'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [] +'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [] DESCRIPTION @@ -111,6 +111,15 @@ flags, with the same meaning as the flags from the `set` command, in order to change which sparsity mode you are using without needing to also respecify all sparsity paths. +'clean':: + Remove all files in tracked directories that are outside of the + sparse-checkout definition. This subcommand requires cone-mode + sparse-checkout to be sure that we know which directories are + both tracked and all contained paths are not in the sparse-checkout. + This command can be used to be sure the sparse index works + efficiently, though it does not require enabling the sparse index + feature via the `index.sparse=true` configuration. + 'disable':: Disable the `core.sparseCheckout` config setting, and restore the working directory to include all files. diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 61714bf80be044..6fe6ec718fe3bc 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -2,6 +2,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" +#include "abspath.h" #include "config.h" #include "dir.h" #include "environment.h" @@ -23,7 +24,7 @@ static const char *empty_base = ""; static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules) []"), + N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules | clean) []"), NULL }; @@ -924,6 +925,66 @@ static int sparse_checkout_reapply(int argc, const char **argv, return update_working_directory(repo, NULL); } +static char const * const builtin_sparse_checkout_clean_usage[] = { + "git sparse-checkout clean [-n|--dry-run]", + NULL +}; + +static const char *msg_remove = N_("Removing %s\n"); + +static int sparse_checkout_clean(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct strbuf full_path = STRBUF_INIT; + const char *msg = msg_remove; + size_t worktree_len; + + struct option builtin_sparse_checkout_clean_options[] = { + OPT_END(), + }; + + setup_work_tree(); + if (!repo->settings.sparse_checkout) + die(_("must be in a sparse-checkout to clean directories")); + if (!repo->settings.sparse_checkout_cone) + die(_("must be in a cone-mode sparse-checkout to clean directories")); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_clean_options, + builtin_sparse_checkout_clean_usage, 0); + + if (repo_read_index(repo) < 0) + die(_("failed to read index")); + + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) || + repo->index->sparse_index == INDEX_EXPANDED) + die(_("failed to convert index to a sparse index; resolve merge conflicts and try again")); + + strbuf_addstr(&full_path, repo->worktree); + strbuf_addch(&full_path, '/'); + worktree_len = full_path.len; + + for (size_t i = 0; i < repo->index->cache_nr; i++) { + struct cache_entry *ce = repo->index->cache[i]; + if (!S_ISSPARSEDIR(ce->ce_mode)) + continue; + strbuf_setlen(&full_path, worktree_len); + strbuf_add(&full_path, ce->name, ce->ce_namelen); + + if (!is_directory(full_path.buf)) + continue; + + printf(msg, ce->name); + + if (remove_dir_recursively(&full_path, 0)) + warning_errno(_("failed to remove '%s'"), ce->name); + } + + strbuf_release(&full_path); + return 0; +} + static char const * const builtin_sparse_checkout_disable_usage[] = { "git sparse-checkout disable", NULL @@ -1079,6 +1140,7 @@ int cmd_sparse_checkout(int argc, OPT_SUBCOMMAND("set", &fn, sparse_checkout_set), OPT_SUBCOMMAND("add", &fn, sparse_checkout_add), OPT_SUBCOMMAND("reapply", &fn, sparse_checkout_reapply), + OPT_SUBCOMMAND("clean", &fn, sparse_checkout_clean), OPT_SUBCOMMAND("disable", &fn, sparse_checkout_disable), OPT_SUBCOMMAND("check-rules", &fn, sparse_checkout_check_rules), OPT_END(), diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index ab3a105ffff253..a48eedf766d2a4 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1050,5 +1050,43 @@ test_expect_success 'check-rules null termination' ' test_cmp expect actual ' +test_expect_success 'clean' ' + git -C repo sparse-checkout set --cone deep/deeper1 && + mkdir repo/deep/deeper2 repo/folder1 && + touch repo/deep/deeper2/file && + touch repo/folder1/file && + + cat >expect <<-\EOF && + Removing deep/deeper2/ + Removing folder1/ + EOF + + git -C repo sparse-checkout clean >out && + test_cmp expect out && + + test_path_is_missing repo/deep/deeper2 && + test_path_is_missing repo/folder1 +' + +test_expect_success 'clean with staged sparse change' ' + git -C repo sparse-checkout set --cone deep/deeper1 && + mkdir repo/deep/deeper2 repo/folder1 repo/folder2 && + touch repo/deep/deeper2/file && + touch repo/folder1/file && + echo dirty >repo/folder2/a && + + git -C repo add --sparse folder1/file && + + # deletes deep/deeper2/ but leaves folder1/ and folder2/ + cat >expect <<-\EOF && + Removing deep/deeper2/ + EOF + + git -C repo sparse-checkout clean >out && + test_cmp expect out && + + test_path_is_missing repo/deep/deeper2 && + test_path_exists repo/folder1 +' test_done From 221f3e5fb0c56b75f8fbfa9f4aa34ae93fad0cdb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 15 Jul 2025 10:47:31 -0400 Subject: [PATCH 3/8] sparse-checkout: match some 'clean' behavior The 'git sparse-checkout clean' subcommand is somewhat similar to 'git clean' in that it will delete files that should not be in the worktree. The big difference is that it focuses on the directories that should not be in the worktree due to cone-mode sparse-checkout. It also does not discriminate in the kinds of files and focuses on deleting entire directories. However, there are some restrictions that would be good to bring over from 'git clean', specifically how it refuses to do anything without the '-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce' config can be set to 'false' to imply '--force'. Add this behavior to avoid accidental deletion of files that cannot be recovered from Git. Signed-off-by: Derrick Stolee --- Documentation/git-sparse-checkout.adoc | 9 ++++ builtin/sparse-checkout.c | 15 +++++- t/t1091-sparse-checkout-builtin.sh | 66 +++++++++++++++++++++++++- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 6db88f00781d68..823a66c40bc557 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -119,6 +119,15 @@ all sparsity paths. This command can be used to be sure the sparse index works efficiently, though it does not require enabling the sparse index feature via the `index.sparse=true` configuration. ++ +To prevent accidental deletion of worktree files, the `clean` subcommand +will not delete any files without the `-f` or `--force` option, unless +the `clean.requireForce` config option is set to `false`. ++ +The `--dry-run` option will list the directories that would be removed +without deleting them. Running in this mode can be helpful to predict the +behavior of the clean comand or to determine which kinds of files are left +in the sparse directories. 'disable':: Disable the `core.sparseCheckout` config setting, and restore the diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 6fe6ec718fe3bc..fe332ff5f9413c 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -931,6 +931,7 @@ static char const * const builtin_sparse_checkout_clean_usage[] = { }; static const char *msg_remove = N_("Removing %s\n"); +static const char *msg_would_remove = N_("Would remove %s\n"); static int sparse_checkout_clean(int argc, const char **argv, const char *prefix, @@ -939,8 +940,12 @@ static int sparse_checkout_clean(int argc, const char **argv, struct strbuf full_path = STRBUF_INIT; const char *msg = msg_remove; size_t worktree_len; + int force = 0, dry_run = 0; + int require_force = 1; struct option builtin_sparse_checkout_clean_options[] = { + OPT__DRY_RUN(&dry_run, N_("dry run")), + OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE), OPT_END(), }; @@ -954,6 +959,13 @@ static int sparse_checkout_clean(int argc, const char **argv, builtin_sparse_checkout_clean_options, builtin_sparse_checkout_clean_usage, 0); + repo_config_get_bool(repo, "clean.requireforce", &require_force); + if (require_force && !force && !dry_run) + die(_("for safety, refusing to clean without one of --force or --dry-run")); + + if (dry_run) + msg = msg_would_remove; + if (repo_read_index(repo) < 0) die(_("failed to read index")); @@ -977,7 +989,8 @@ static int sparse_checkout_clean(int argc, const char **argv, printf(msg, ce->name); - if (remove_dir_recursively(&full_path, 0)) + if (dry_run <= 0 && + remove_dir_recursively(&full_path, 0)) warning_errno(_("failed to remove '%s'"), ce->name); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index a48eedf766d2a4..69f5a6dcc6895b 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1056,12 +1056,29 @@ test_expect_success 'clean' ' touch repo/deep/deeper2/file && touch repo/folder1/file && + test_must_fail git -C repo sparse-checkout clean 2>err && + grep "refusing to clean" err && + + git -C repo config clean.requireForce true && + test_must_fail git -C repo sparse-checkout clean 2>err && + grep "refusing to clean" err && + + cat >expect <<-\EOF && + Would remove deep/deeper2/ + Would remove folder1/ + EOF + + git -C repo sparse-checkout clean --dry-run >out && + test_cmp expect out && + test_path_exists repo/deep/deeper2 && + test_path_exists repo/folder1 && + cat >expect <<-\EOF && Removing deep/deeper2/ Removing folder1/ EOF - git -C repo sparse-checkout clean >out && + git -C repo sparse-checkout clean -f >out && test_cmp expect out && test_path_is_missing repo/deep/deeper2 && @@ -1077,16 +1094,61 @@ test_expect_success 'clean with staged sparse change' ' git -C repo add --sparse folder1/file && + cat >expect <<-\EOF && + Would remove deep/deeper2/ + EOF + + git -C repo sparse-checkout clean --dry-run >out && + test_cmp expect out && + test_path_exists repo/deep/deeper2 && + test_path_exists repo/folder1 && + test_path_exists repo/folder2 && + # deletes deep/deeper2/ but leaves folder1/ and folder2/ cat >expect <<-\EOF && Removing deep/deeper2/ EOF + # The previous test case checked the -f option, so + # test the config option in this one. + git -C repo config clean.requireForce false && git -C repo sparse-checkout clean >out && test_cmp expect out && test_path_is_missing repo/deep/deeper2 && - test_path_exists repo/folder1 + test_path_exists repo/folder1 && + test_path_exists repo/folder2 +' + +test_expect_success 'clean with merge conflict status' ' + git clone repo clean-merge && + + echo dirty >clean-merge/deep/deeper2/a && + touch clean-merge/folder2/extra && + + cat >input <<-EOF && + 0 $ZERO_OID folder1/a + 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a + EOF + git -C clean-merge update-index --index-info err && + grep "failed to convert index to a sparse index" err && + + mkdir -p clean-merge/folder1/ && + echo merged >clean-merge/folder1/a && + git -C clean-merge add --sparse folder1/a && + + # deletes folder2/ but leaves staged change in folder1 + # and dirty change in deep/deeper2/ + cat >expect <<-\EOF && + Removing folder2/ + EOF + + git -C clean-merge sparse-checkout clean -f >out && + test_cmp expect out ' test_done From fd9a20a392265a5aaed27301a3b324164bb0af86 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 15 Jul 2025 13:11:07 -0400 Subject: [PATCH 4/8] dir: add generic "walk all files" helper There is sometimes a need to visit every file within a directory, recursively. The main example is remove_dir_recursively(), though it has some extra flags that make it want to iterate over paths in a custom way. There is also the fill_directory() approach but that involves an index and a pathspec. This change adds a new for_each_file_in_dir() method that will be helpful in the next change. Signed-off-by: Derrick Stolee --- dir.c | 28 ++++++++++++++++++++++++++++ dir.h | 14 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/dir.c b/dir.c index d2b0a5aef6705e..2e567ff92746b1 100644 --- a/dir.c +++ b/dir.c @@ -30,6 +30,7 @@ #include "read-cache-ll.h" #include "setup.h" #include "sparse-index.h" +#include "strbuf.h" #include "submodule-config.h" #include "symlinks.h" #include "trace2.h" @@ -87,6 +88,33 @@ struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp) return e; } +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data) +{ + struct dirent *e; + int res = 0; + size_t baselen = path->len; + DIR *dir = opendir(path->buf); + + if (!dir) + return 0; + + while (!res && (e = readdir_skip_dot_and_dotdot(dir)) != NULL) { + unsigned char dtype = get_dtype(e, path, 0); + strbuf_setlen(path, baselen); + strbuf_addstr(path, e->d_name); + + if (dtype == DT_REG) { + res = fn(path->buf, data); + } else if (dtype == DT_DIR) { + strbuf_addch(path, '/'); + res = for_each_file_in_dir(path, fn, data); + } + } + + closedir(dir); + return res; +} + int count_slashes(const char *s) { int cnt = 0; diff --git a/dir.h b/dir.h index d7e71aa8daa7d8..f4235cc12a2fe2 100644 --- a/dir.h +++ b/dir.h @@ -536,6 +536,20 @@ int get_sparse_checkout_patterns(struct pattern_list *pl); */ int remove_dir_recursively(struct strbuf *path, int flag); +/* + * This function pointer type is called on each file discovered in + * for_each_file_in_dir. The iteration stops if this method returns + * non-zero. + */ +typedef int (*file_iterator)(const char *path, const void *data); + +struct strbuf; +/* + * Given a directory path, recursively visit each file within, including + * within subdirectories. + */ +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data); + /* * Tries to remove the path, along with leading empty directories so long as * those empty directories are not startup_info->original_cwd. Ignores From f464bb5ed6be91940c3abb54b77cb7b9d893bd67 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 15 Jul 2025 13:15:32 -0400 Subject: [PATCH 5/8] sparse-checkout: add --verbose option to 'clean' The 'git sparse-checkout clean' subcommand is focused on directories, deleting any tracked sparse directories to clean up the worktree and make the sparse index feature work optimally. However, this directory-focused approach can leave users wondering why those directories exist at all. In my experience, these files are left over due to ignore or exclude patterns, Windows file handles, or possibly merge conflict resolutions. Add a new '--verbose' option for users to see all the files that are being deleted (with '--force') or would be deleted (with '--dry-run'). Signed-off-by: Derrick Stolee --- Documentation/git-sparse-checkout.adoc | 5 +++++ builtin/sparse-checkout.c | 28 ++++++++++++++++++++++++-- t/t1091-sparse-checkout-builtin.sh | 14 ++++++++++--- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 823a66c40bc557..604f53f77caf4c 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -128,6 +128,11 @@ The `--dry-run` option will list the directories that would be removed without deleting them. Running in this mode can be helpful to predict the behavior of the clean comand or to determine which kinds of files are left in the sparse directories. ++ +The `--verbose` option will list every file within the directories that +are considered for removal. This option is helpful to determine if those +files are actually important or perhaps to explain why the directory is +still present despite the current sparse-checkout. 'disable':: Disable the `core.sparseCheckout` config setting, and restore the diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index fe332ff5f9413c..f38a0809c09842 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -930,6 +930,26 @@ static char const * const builtin_sparse_checkout_clean_usage[] = { NULL }; +static int list_file_iterator(const char *path, const void *data) +{ + const char *msg = data; + + printf(msg, path); + return 0; +} + +static void list_every_file_in_dir(const char *msg, + const char *directory) +{ + struct strbuf path = STRBUF_INIT; + + strbuf_addstr(&path, directory); + fprintf(stderr, "list every file in %s\n", directory); + + for_each_file_in_dir(&path, list_file_iterator, msg); + strbuf_release(&path); +} + static const char *msg_remove = N_("Removing %s\n"); static const char *msg_would_remove = N_("Would remove %s\n"); @@ -940,12 +960,13 @@ static int sparse_checkout_clean(int argc, const char **argv, struct strbuf full_path = STRBUF_INIT; const char *msg = msg_remove; size_t worktree_len; - int force = 0, dry_run = 0; + int force = 0, dry_run = 0, verbose = 0; int require_force = 1; struct option builtin_sparse_checkout_clean_options[] = { OPT__DRY_RUN(&dry_run, N_("dry run")), OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE), + OPT__VERBOSE(&verbose, N_("report each affected file, not just directories")), OPT_END(), }; @@ -987,7 +1008,10 @@ static int sparse_checkout_clean(int argc, const char **argv, if (!is_directory(full_path.buf)) continue; - printf(msg, ce->name); + if (verbose) + list_every_file_in_dir(msg, ce->name); + else + printf(msg, ce->name); if (dry_run <= 0 && remove_dir_recursively(&full_path, 0)) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 69f5a6dcc6895b..9a89b902c3f58d 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1052,9 +1052,9 @@ test_expect_success 'check-rules null termination' ' test_expect_success 'clean' ' git -C repo sparse-checkout set --cone deep/deeper1 && - mkdir repo/deep/deeper2 repo/folder1 && + mkdir -p repo/deep/deeper2 repo/folder1/extra/inside && touch repo/deep/deeper2/file && - touch repo/folder1/file && + touch repo/folder1/extra/inside/file && test_must_fail git -C repo sparse-checkout clean 2>err && grep "refusing to clean" err && @@ -1071,7 +1071,15 @@ test_expect_success 'clean' ' git -C repo sparse-checkout clean --dry-run >out && test_cmp expect out && test_path_exists repo/deep/deeper2 && - test_path_exists repo/folder1 && + test_path_exists repo/folder1/extra/inside/file && + + cat >expect <<-\EOF && + Would remove deep/deeper2/file + Would remove folder1/extra/inside/file + EOF + + git -C repo sparse-checkout clean --dry-run --verbose >out && + test_cmp expect out && cat >expect <<-\EOF && Removing deep/deeper2/ From d6dbc0b5ca9cdcd5956e02a0fe603b170dbae1ce Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jul 2025 11:55:46 -0400 Subject: [PATCH 6/8] sparse-index: point users to new 'clean' action In my experience, the most-common reason that the sparse index must expand to a full one is because there is some leftover file in a tracked directory that is now outside of the sparse-checkout. The new 'git sparse-checkout clean' command will find and delete these directories, so point users to it when they hit the sparse index expansion advice. Signed-off-by: Derrick Stolee --- sparse-index.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sparse-index.c b/sparse-index.c index ff33b8516b9f3d..cbf2bf618c37dc 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -31,7 +31,8 @@ int give_advice_on_expansion = 1; "Your working directory likely has contents that are outside of\n" \ "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \ "see your sparse-checkout definition and compare it to your working\n" \ - "directory contents. Running 'git clean' may assist in this cleanup." + "directory contents. Running 'git sparse-checkout clean' may assist\n" \ + "in this cleanup." struct modify_index_context { struct index_state *write; From 0b1a2895b9018fe15a931a097ca18fed8d9c7676 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 16 Jul 2025 10:59:11 -0400 Subject: [PATCH 7/8] t: expand tests around sparse merges and clean With the current implementation of 'git sparse-checkout clean', we notice that a file that was in a conflicted state does not get cleaned up because of some internal details around the SKIP_WORKTREE bit. This test is documenting the current behavior before we update it in the following change. Signed-off-by: Derrick Stolee --- t/t1091-sparse-checkout-builtin.sh | 56 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 9a89b902c3f58d..116ad7c9a20e7a 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1128,35 +1128,47 @@ test_expect_success 'clean with staged sparse change' ' test_path_exists repo/folder2 ' -test_expect_success 'clean with merge conflict status' ' - git clone repo clean-merge && +test_expect_success 'sparse-checkout operations with merge conflicts' ' + git clone repo merge && - echo dirty >clean-merge/deep/deeper2/a && - touch clean-merge/folder2/extra && + ( + cd merge && + mkdir -p folder1/even/more/dirs && + echo base >folder1/even/more/dirs/file && + git add folder1 && + git commit -m "base" && - cat >input <<-EOF && - 0 $ZERO_OID folder1/a - 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a - EOF - git -C clean-merge update-index --index-info folder1/even/more/dirs/file && + git commit -a -m "right" && - git -C clean-merge sparse-checkout set deep/deeper1 && + git checkout -b left HEAD~1 && + echo left >folder1/even/more/dirs/file && + git commit -a -m "left" && - test_must_fail git -C clean-merge sparse-checkout clean -f 2>err && - grep "failed to convert index to a sparse index" err && + git checkout -b merge && + git sparse-checkout set deep/deeper1 && - mkdir -p clean-merge/folder1/ && - echo merged >clean-merge/folder1/a && - git -C clean-merge add --sparse folder1/a && + test_must_fail git merge -m "will-conflict" right && - # deletes folder2/ but leaves staged change in folder1 - # and dirty change in deep/deeper2/ - cat >expect <<-\EOF && - Removing folder2/ - EOF + test_must_fail git sparse-checkout clean -f 2>err && + grep "failed to convert index to a sparse index" err && - git -C clean-merge sparse-checkout clean -f >out && - test_cmp expect out + echo merged >folder1/even/more/dirs/file && + git add --sparse folder1 && + git merge --continue && + + test_path_exists folder1/even/more/dirs/file && + + # clean does not remove the file, because the + # SKIP_WORKTREE bit was not cleared by the merge command. + git sparse-checkout clean -f >out && + test_line_count = 0 out && + test_path_exists folder1/even/more/dirs/file && + + git sparse-checkout reapply && + test_path_is_missing folder1 + ) ' test_done From 82c24ce51980d85e1a53e746b462397e6e6c908a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 16 Jul 2025 19:44:34 -0400 Subject: [PATCH 8/8] sparse-checkout: make 'clean' clear more files The 'git sparse-checkout clean' command is designed to be a one-command way to get the worktree in a state such that a sparse index would operate efficiently. The previous change demonstrated that files outside the sparse-checkout that were committed due to a merge conflict would persist despite attempts to run 'git sparse-checkout clean' and instead a 'git sparse-checkout reapply' would be required. Instead of requiring users to run both commands, update 'clean' to be more ruthless about tracked sparse directories. The key here is to make sure that the SKIP_WORKTREE bit is removed from more paths in the index using update_sparsity() before compressing the index to a sparse one in-memory. The tricky part here is that update_sparsity() was previously assuming that it would be in 'update' mode and would change the worktree as it made changes. However, we do not want to make these worktree changes at this point, instead relying on our later logic (that integrates with --dry-run and --verbose options) to perform those steps. One side-effect here is that we also clear out staged files that exist in the worktree, but they would also appear in the verbose output as part of the dry run. The final test in t1091 demonstrates that we no longer need the 'reapply' subcommand for merge resolutions. It also fixes an earlier case where 'git add --sparse' clears the SKIP_WORKTREE bit and avoids a directory deletion. Signed-off-by: Derrick Stolee --- builtin/sparse-checkout.c | 8 ++++++++ t/t1091-sparse-checkout-builtin.sh | 24 +++++++++++++++++------- unpack-trees.c | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index f38a0809c09842..1d1d5208a3ba33 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -962,6 +962,7 @@ static int sparse_checkout_clean(int argc, const char **argv, size_t worktree_len; int force = 0, dry_run = 0, verbose = 0; int require_force = 1; + struct unpack_trees_options o = { 0 }; struct option builtin_sparse_checkout_clean_options[] = { OPT__DRY_RUN(&dry_run, N_("dry run")), @@ -990,6 +991,13 @@ static int sparse_checkout_clean(int argc, const char **argv, if (repo_read_index(repo) < 0) die(_("failed to read index")); + o.verbose_update = verbose; + o.update = 0; /* skip modifying the worktree here. */ + o.head_idx = -1; + o.src_index = o.dst_index = repo->index; + if (update_sparsity(&o, NULL)) + warning(_("failed to reapply sparse-checkout patterns")); + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) || repo->index->sparse_index == INDEX_EXPANDED) die(_("failed to convert index to a sparse index; resolve merge conflicts and try again")); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 116ad7c9a20e7a..4b9078d90a616c 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1104,6 +1104,7 @@ test_expect_success 'clean with staged sparse change' ' cat >expect <<-\EOF && Would remove deep/deeper2/ + Would remove folder1/ EOF git -C repo sparse-checkout clean --dry-run >out && @@ -1115,6 +1116,7 @@ test_expect_success 'clean with staged sparse change' ' # deletes deep/deeper2/ but leaves folder1/ and folder2/ cat >expect <<-\EOF && Removing deep/deeper2/ + Removing folder1/ EOF # The previous test case checked the -f option, so @@ -1124,7 +1126,7 @@ test_expect_success 'clean with staged sparse change' ' test_cmp expect out && test_path_is_missing repo/deep/deeper2 && - test_path_exists repo/folder1 && + test_path_is_missing repo/folder1 && test_path_exists repo/folder2 ' @@ -1147,7 +1149,11 @@ test_expect_success 'sparse-checkout operations with merge conflicts' ' git commit -a -m "left" && git checkout -b merge && - git sparse-checkout set deep/deeper1 && + + touch deep/deeper2/extra && + git sparse-checkout set deep/deeper1 2>err && + grep "contains untracked files" err && + test_path_exists deep/deeper2/extra && test_must_fail git merge -m "will-conflict" right && @@ -1159,15 +1165,19 @@ test_expect_success 'sparse-checkout operations with merge conflicts' ' git merge --continue && test_path_exists folder1/even/more/dirs/file && + test_path_exists deep/deeper2/extra && + + cat >expect <<-\EOF && + Removing deep/deeper2/ + Removing folder1/ + EOF # clean does not remove the file, because the # SKIP_WORKTREE bit was not cleared by the merge command. git sparse-checkout clean -f >out && - test_line_count = 0 out && - test_path_exists folder1/even/more/dirs/file && - - git sparse-checkout reapply && - test_path_is_missing folder1 + test_cmp expect out && + test_path_is_missing folder1 && + test_path_is_missing deep/deeper2 ) ' diff --git a/unpack-trees.c b/unpack-trees.c index 0e9813bddf048e..b8814af1b07c0d 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2138,7 +2138,7 @@ enum update_sparsity_result update_sparsity(struct unpack_trees_options *o, index_state_init(&o->internal.result, o->src_index->repo); /* Sanity checks */ - if (!o->update || o->index_only || o->skip_sparse_checkout) + if (o->index_only || o->skip_sparse_checkout) BUG("update_sparsity() is for reflecting sparsity patterns in working directory"); if (o->src_index != o->dst_index || o->fn) BUG("update_sparsity() called wrong");