From 0cc625f3f5ee377ffb1f498a61026a4cbe089e94 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:23 +0200 Subject: [PATCH 1/3] last-modified: new subcommand to show when files were last modified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to git-blame(1), introduce a new subcommand git-last-modified(1). This command shows the most recent modification to paths in a tree. It does so by expanding the tree at a given commit, taking note of the current state of each path, and then walking backwards through history looking for commits where each path changed into its final commit ID. Based-on-patch-by: Jeff King Improved-by: Ævar Arnfjörð Bjarmason Signed-off-by: Toon Claes --- .gitignore | 1 + Documentation/git-last-modified.adoc | 49 +++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 1 + builtin/last-modified.c | 289 +++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t8020-last-modified.sh | 204 +++++++++++++++++++ 11 files changed, 550 insertions(+) create mode 100644 Documentation/git-last-modified.adoc create mode 100644 builtin/last-modified.c create mode 100755 t/t8020-last-modified.sh diff --git a/.gitignore b/.gitignore index 04c444404e4ba8..a36ee944433574 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,7 @@ /git-init-db /git-interpret-trailers /git-instaweb +/git-last-modified /git-log /git-ls-files /git-ls-remote diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc new file mode 100644 index 00000000000000..89138ebeb7afec --- /dev/null +++ b/Documentation/git-last-modified.adoc @@ -0,0 +1,49 @@ +git-last-modified(1) +==================== + +NAME +---- +git-last-modified - EXPERIMENTAL: Show when files were last modified + + +SYNOPSIS +-------- +[synopsis] +git last-modified [-r] [-t] [] [[--] ...] + +DESCRIPTION +----------- + +Shows which commit last modified each of the relevant files and subdirectories. + +THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. + +OPTIONS +------- + +-r:: + Recurse into subtrees. + +-t:: + Show tree entry itself as well as subtrees. Implies `-r`. + +:: + Only traverse commits in the specified revision range. When no + `` is specified, it defaults to `HEAD` (i.e. the whole + history leading to the current commit). For a complete list of ways to + spell ``, see the 'Specifying Ranges' section of + linkgit:gitrevisions[7]. + +[--] ...:: + For each __ given, the commit which last modified it is returned. + Without an optional path parameter, all files and subdirectories + in path traversal the are included in the output. + +SEE ALSO +-------- +linkgit:git-blame[1], +linkgit:git-log[1]. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 2fe1a1369d444d..99aeb6d0e00289 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -74,6 +74,7 @@ manpages = { 'git-init.adoc' : 1, 'git-instaweb.adoc' : 1, 'git-interpret-trailers.adoc' : 1, + 'git-last-modified.adoc' : 1, 'git-log.adoc' : 1, 'git-ls-files.adoc' : 1, 'git-ls-remote.adoc' : 1, diff --git a/Makefile b/Makefile index 70d1543b6b8688..11bf4fb55aaf2f 100644 --- a/Makefile +++ b/Makefile @@ -1267,6 +1267,7 @@ BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o +BUILTIN_OBJS += builtin/last-modified.o BUILTIN_OBJS += builtin/log.o BUILTIN_OBJS += builtin/ls-files.o BUILTIN_OBJS += builtin/ls-remote.o diff --git a/builtin.h b/builtin.h index bff13e3069b4af..6ed6759ec4e037 100644 --- a/builtin.h +++ b/builtin.h @@ -176,6 +176,7 @@ int cmd_hook(int argc, const char **argv, const char *prefix, struct repository int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_interpret_trailers(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log_reflog(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_ls_files(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/last-modified.c b/builtin/last-modified.c new file mode 100644 index 00000000000000..63993bc1c92604 --- /dev/null +++ b/builtin/last-modified.c @@ -0,0 +1,289 @@ +#include "git-compat-util.h" +#include "builtin.h" +#include "commit.h" +#include "config.h" +#include "diff.h" +#include "diffcore.h" +#include "hashmap.h" +#include "hex.h" +#include "log-tree.h" +#include "object-name.h" +#include "object.h" +#include "parse-options.h" +#include "quote.h" +#include "repository.h" +#include "revision.h" + +struct last_modified_entry { + struct hashmap_entry hashent; + struct object_id oid; + const char path[FLEX_ARRAY]; +}; + +static int last_modified_entry_hashcmp(const void *unused UNUSED, + const struct hashmap_entry *hent1, + const struct hashmap_entry *hent2, + const void *path) +{ + const struct last_modified_entry *ent1 = + container_of(hent1, const struct last_modified_entry, hashent); + const struct last_modified_entry *ent2 = + container_of(hent2, const struct last_modified_entry, hashent); + return strcmp(ent1->path, path ? path : ent2->path); +} + +struct last_modified { + struct hashmap paths; + struct rev_info rev; + int recursive, tree_in_recursive; +}; + +static void last_modified_release(struct last_modified *lm) +{ + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); + release_revisions(&lm->rev); +} + +typedef void (*last_modified_callback)(const char *path, + const struct commit *commit, void *data); + +struct last_modified_callback_data { + struct commit *commit; + struct hashmap *paths; + + last_modified_callback callback; + void *callback_data; +}; + +static void add_path_from_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *data) +{ + struct last_modified *lm = data; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct last_modified_entry *ent; + const char *path = p->two->path; + + FLEX_ALLOC_STR(ent, path, path); + oidcpy(&ent->oid, &p->two->oid); + hashmap_entry_init(&ent->hashent, strhash(ent->path)); + hashmap_add(&lm->paths, &ent->hashent); + } +} + +static int populate_paths_from_revs(struct last_modified *lm) +{ + int num_interesting = 0; + struct diff_options diffopt; + + memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); + copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); + /* + * Use a callback to populate the paths from revs + */ + diffopt.output_format = DIFF_FORMAT_CALLBACK; + diffopt.format_callback = add_path_from_diff; + diffopt.format_callback_data = lm; + + for (size_t i = 0; i < lm->rev.pending.nr; i++) { + struct object_array_entry *obj = lm->rev.pending.objects + i; + + if (obj->item->flags & UNINTERESTING) + continue; + + if (num_interesting++) + return error(_("can only get last-modified one tree at a time")); + + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &obj->item->oid, "", &diffopt); + diff_flush(&diffopt); + } + diff_free(&diffopt); + + return 0; +} + +static void mark_path(const char *path, const struct object_id *oid, + struct last_modified_callback_data *data) +{ + struct last_modified_entry *ent; + + /* Is it even a path that we are interested in? */ + ent = hashmap_get_entry_from_hash(data->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (!ent) + return; + + /* + * Is it arriving at a version of interest, or is it from a side branch + * which did not contribute to the final state? + */ + if (!oideq(oid, &ent->oid)) + return; + + if (data->callback) + data->callback(path, data->commit, data->callback_data); + + hashmap_remove(data->paths, &ent->hashent, path); + free(ent); +} + +static void last_modified_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *cbdata) +{ + struct last_modified_callback_data *data = cbdata; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + switch (p->status) { + case DIFF_STATUS_DELETED: + /* + * There's no point in feeding a deletion, as it could + * not have resulted in our current state, which + * actually has the file. + */ + break; + + default: + /* + * Otherwise, we care only that we somehow arrived at + * a final oid state. Note that this covers some + * potentially controversial areas, including: + * + * 1. A rename or copy will be found, as it is the + * first time the content has arrived at the given + * path. + * + * 2. Even a non-content modification like a mode or + * type change will trigger it. + * + * We take the inclusive approach for now, and find + * anything which impacts the path. Options to tweak + * the behavior (e.g., to "--follow" the content across + * renames) can come later. + */ + mark_path(p->two->path, &p->two->oid, data); + break; + } + } +} + +static int last_modified_run(struct last_modified *lm, + last_modified_callback cb, void *cbdata) +{ + struct last_modified_callback_data data; + + data.paths = &lm->paths; + data.callback = cb; + data.callback_data = cbdata; + + lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; + lm->rev.diffopt.format_callback = last_modified_diff; + lm->rev.diffopt.format_callback_data = &data; + + prepare_revision_walk(&lm->rev); + + while (hashmap_get_size(&lm->paths)) { + data.commit = get_revision(&lm->rev); + if (!data.commit) + break; + + if (data.commit->object.flags & BOUNDARY) { + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &data.commit->object.oid, "", + &lm->rev.diffopt); + diff_flush(&lm->rev.diffopt); + } else { + log_tree_commit(&lm->rev, data.commit); + } + } + + return 0; +} + +static void show_entry(const char *path, const struct commit *commit, void *d) +{ + struct last_modified *lm = d; + + if (commit->object.flags & BOUNDARY) + putchar('^'); + printf("%s\t", oid_to_hex(&commit->object.oid)); + + if (lm->rev.diffopt.line_termination) + write_name_quoted(path, stdout, '\n'); + else + printf("%s%c", path, '\0'); + + fflush(stdout); +} + +static int last_modified_init(struct last_modified *lm, struct repository *r, + const char *prefix, int argc, const char **argv) +{ + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); + + repo_init_revisions(r, &lm->rev, prefix); + lm->rev.def = "HEAD"; + lm->rev.combine_merges = 1; + lm->rev.show_root_diff = 1; + lm->rev.boundary = 1; + lm->rev.no_commit_id = 1; + lm->rev.diff = 1; + lm->rev.diffopt.flags.recursive = lm->recursive || lm->tree_in_recursive; + lm->rev.diffopt.flags.tree_in_recursive = lm->tree_in_recursive; + + if ((argc = setup_revisions(argc, argv, &lm->rev, NULL)) > 1) { + error(_("unknown last-modified argument: %s"), argv[1]); + return argc; + } + + if (populate_paths_from_revs(lm) < 0) + return error(_("unable to setup last-modified")); + + return 0; +} + +int cmd_last_modified(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + int ret; + struct last_modified lm; + + const char * const last_modified_usage[] = { + N_("git last-modified [-r] [-t] " + "[] [[--] ...]"), + NULL + }; + + struct option last_modified_options[] = { + OPT_BOOL('r', "recursive", &lm.recursive, + N_("recurse into subtrees")), + OPT_BOOL('t', "tree-in-recursive", &lm.tree_in_recursive, + N_("recurse into subtrees and include the tree entries too")), + OPT_END() + }; + + memset(&lm, 0, sizeof(lm)); + + argc = parse_options(argc, argv, prefix, last_modified_options, + last_modified_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); + + repo_config(repo, git_default_config, NULL); + + if ((ret = last_modified_init(&lm, repo, prefix, argc, argv))) { + if (ret > 0) + usage_with_options(last_modified_usage, + last_modified_options); + goto out; + } + + if ((ret = last_modified_run(&lm, show_entry, &lm))) + goto out; + +out: + last_modified_release(&lm); + + return ret; +} diff --git a/command-list.txt b/command-list.txt index b7ade3ab9f3319..b715777b248a0d 100644 --- a/command-list.txt +++ b/command-list.txt @@ -124,6 +124,7 @@ git-index-pack plumbingmanipulators git-init mainporcelain init git-instaweb ancillaryinterrogators complete git-interpret-trailers purehelpers +git-last-modified plumbinginterrogators git-log mainporcelain info git-ls-files plumbinginterrogators git-ls-remote plumbinginterrogators diff --git a/git.c b/git.c index 07a5fe39fb69f0..76a0b2a1a44d39 100644 --- a/git.c +++ b/git.c @@ -565,6 +565,7 @@ static struct cmd_struct commands[] = { { "init", cmd_init_db }, { "init-db", cmd_init_db }, { "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY }, + { "last-modified", cmd_last_modified, RUN_SETUP }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, diff --git a/meson.build b/meson.build index 7fea4a34d684c5..a45608f0a448cf 100644 --- a/meson.build +++ b/meson.build @@ -607,6 +607,7 @@ builtin_sources = [ 'builtin/index-pack.c', 'builtin/init-db.c', 'builtin/interpret-trailers.c', + 'builtin/last-modified.c', 'builtin/log.c', 'builtin/ls-files.c', 'builtin/ls-remote.c', diff --git a/t/meson.build b/t/meson.build index 6d7fe6b117ea4a..eee1863eb34b72 100644 --- a/t/meson.build +++ b/t/meson.build @@ -961,6 +961,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh new file mode 100755 index 00000000000000..921d2a0807d5bb --- /dev/null +++ b/t/t8020-last-modified.sh @@ -0,0 +1,204 @@ +#!/bin/sh + +test_description='last-modified tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit 1 file && + mkdir a && + test_commit 2 a/file && + mkdir a/b && + test_commit 3 a/b/file +' + +test_expect_success 'cannot run last-modified on two trees' ' + test_must_fail git last-modified HEAD HEAD~1 +' + +check_last_modified() { + local indir= && + while test $# != 0 + do + case "$1" in + -C) + indir="$2" + shift + ;; + *) + break + ;; + esac && + shift + done && + + cat >expect && + test_when_finished "rm -f tmp.*" && + git ${indir:+-C "$indir"} last-modified "$@" >tmp.1 && + git name-rev --annotate-stdin --name-only --tags \ + tmp.2 && + tr '\t' ' ' tmp.3 && + sort tmp.3 >actual && + test_cmp expect actual +} + +test_expect_success 'last-modified non-recursive' ' + check_last_modified <<-\EOF + 1 file + 3 a + EOF +' + +test_expect_success 'last-modified recursive' ' + check_last_modified -r <<-\EOF + 1 file + 2 a/file + 3 a/b/file + EOF +' + +test_expect_success 'last-modified recursive with tree' ' + check_last_modified -t <<-\EOF + 1 file + 2 a/file + 3 a + 3 a/b + 3 a/b/file + EOF +' + +test_expect_success 'last-modified subdir' ' + check_last_modified a <<-\EOF + 3 a + EOF +' + +test_expect_success 'last-modified subdir recursive' ' + check_last_modified -r a <<-\EOF + 2 a/file + 3 a/b/file + EOF +' + +test_expect_success 'last-modified from non-HEAD commit' ' + check_last_modified HEAD^ <<-\EOF + 1 file + 2 a + EOF +' + +test_expect_success 'last-modified from subdir defaults to root' ' + check_last_modified -C a <<-\EOF + 1 file + 3 a + EOF +' + +test_expect_success 'last-modified from subdir uses relative pathspecs' ' + check_last_modified -C a -r b <<-\EOF + 3 a/b/file + EOF +' + +test_expect_success 'limit last-modified traversal by count' ' + check_last_modified -1 <<-\EOF + 3 a + ^2 file + EOF +' + +test_expect_success 'limit last-modified traversal by commit' ' + check_last_modified HEAD~2..HEAD <<-\EOF + 3 a + ^1 file + EOF +' + +test_expect_success 'only last-modified files in the current tree' ' + git rm -rf a && + git commit -m "remove a" && + check_last_modified <<-\EOF + 1 file + EOF +' + +test_expect_success 'cross merge boundaries in blaming' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit m1 && + git checkout HEAD^ && + git rm -rf . && + test_commit m2 && + git merge m1 && + check_last_modified <<-\EOF + m1 m1.t + m2 m2.t + EOF +' + +test_expect_success 'last-modified merge for resolved conflicts' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit c1 conflict && + git checkout HEAD^ && + git rm -rf . && + test_commit c2 conflict && + test_must_fail git merge c1 && + test_commit resolved conflict && + check_last_modified conflict <<-\EOF + resolved conflict + EOF +' + + +# Consider `file` with this content through history: +# +# A---B---B-------B---B +# \ / +# C---D +test_expect_success 'last-modified merge ignores content from branch' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit a1 file A && + test_commit a2 file B && + test_commit a3 file C && + test_commit a4 file D && + git checkout a2 && + git merge --no-commit --no-ff a4 && + git checkout a2 -- file && + git merge --continue && + check_last_modified <<-\EOF + a2 file + EOF +' + +# Consider `file` with this content through history: +# +# A---B---B---C---D---B---B +# \ / +# B-------B +test_expect_success 'last-modified merge undoes changes' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit b1 file A && + test_commit b2 file B && + test_commit b3 file C && + test_commit b4 file D && + git checkout b2 && + test_commit b5 file2 2 && + git checkout b4 && + git merge --no-commit --no-ff b5 && + git checkout b2 -- file && + git merge --continue && + check_last_modified <<-\EOF + b2 file + b5 file2 + EOF +' + +test_expect_success 'last-modified complains about unknown arguments' ' + test_must_fail git last-modified --foo 2>err && + grep "unknown last-modified argument: --foo" err +' + +test_done From a017f2c81c2b190602178f4adbf81a756cb773e0 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:24 +0200 Subject: [PATCH 2/3] t/perf: add last-modified perf script This just runs some simple last-modified commands. We already test correctness in the regular suite, so this is just about finding performance regressions from one version to another. Based-on-patch-by: Jeff King Signed-off-by: Toon Claes --- t/meson.build | 1 + t/perf/p8020-last-modified.sh | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100755 t/perf/p8020-last-modified.sh diff --git a/t/meson.build b/t/meson.build index eee1863eb34b72..b41dfc41d7dca9 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1154,6 +1154,7 @@ benchmarks = [ 'perf/p7820-grep-engines.sh', 'perf/p7821-grep-engines-fixed.sh', 'perf/p7822-grep-perl-character.sh', + 'perf/p8020-last-modified.sh', 'perf/p9210-scalar.sh', 'perf/p9300-fast-import-export.sh', ] diff --git a/t/perf/p8020-last-modified.sh b/t/perf/p8020-last-modified.sh new file mode 100755 index 00000000000000..a02ec907d42c82 --- /dev/null +++ b/t/perf/p8020-last-modified.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +test_description='last-modified perf tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_perf 'top-level last-modified' ' + git last-modified HEAD +' + +test_perf 'top-level recursive last-modified' ' + git last-modified -r HEAD +' + +test_perf 'subdir last-modified' ' + path=$(git ls-tree HEAD | grep ^040000 | head -n 1 | cut -f2) + git last-modified -r HEAD -- "$path" +' + +test_done From c739a7dbccb0d94cc59ab1004d5db00a8a10dced Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:25 +0200 Subject: [PATCH 3/3] last-modified: use Bloom filters when available Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau Signed-off-by: Toon Claes --- builtin/last-modified.c | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 63993bc1c92604..466df04fbac57f 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -1,5 +1,7 @@ #include "git-compat-util.h" +#include "bloom.h" #include "builtin.h" +#include "commit-graph.h" #include "commit.h" #include "config.h" #include "diff.h" @@ -17,6 +19,7 @@ struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; + struct bloom_key key; const char path[FLEX_ARRAY]; }; @@ -40,6 +43,12 @@ struct last_modified { static void last_modified_release(struct last_modified *lm) { + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) + clear_bloom_key(&ent->key); + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); } @@ -67,6 +76,9 @@ static void add_path_from_diff(struct diff_queue_struct *q, FLEX_ALLOC_STR(ent, path, path); oidcpy(&ent->oid, &p->two->oid); + if (lm->rev.bloom_filter_settings) + fill_bloom_key(path, strlen(path), &ent->key, + lm->rev.bloom_filter_settings); hashmap_entry_init(&ent->hashent, strhash(ent->path)); hashmap_add(&lm->paths, &ent->hashent); } @@ -126,6 +138,7 @@ static void mark_path(const char *path, const struct object_id *oid, data->callback(path, data->commit, data->callback_data); hashmap_remove(data->paths, &ent->hashent, path); + clear_bloom_key(&ent->key); free(ent); } @@ -169,6 +182,28 @@ static void last_modified_diff(struct diff_queue_struct *q, } } + +static int maybe_changed_path(struct last_modified *lm, struct commit *origin) +{ + struct bloom_filter *filter; + struct last_modified_entry *ent; + struct hashmap_iter iter; + + if (!lm->rev.bloom_filter_settings) + return 1; + + filter = get_bloom_filter(lm->rev.repo, origin); + if (!filter) + return 1; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (bloom_filter_contains(filter, &ent->key, + lm->rev.bloom_filter_settings)) + return 1; + } + return 0; +} + static int last_modified_run(struct last_modified *lm, last_modified_callback cb, void *cbdata) { @@ -189,6 +224,9 @@ static int last_modified_run(struct last_modified *lm, if (!data.commit) break; + if (!maybe_changed_path(lm, data.commit)) + continue; + if (data.commit->object.flags & BOUNDARY) { diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, &data.commit->object.oid, "", @@ -238,6 +276,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, return argc; } + /* + * We're not interested in generation numbers here, + * but calling this function to prepare the commit-graph. + */ + (void)generation_numbers_enabled(lm->rev.repo); + lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); + if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified"));