From 44351ad3d8995dea04f4197b77e3794ab99b1e50 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 20 Jan 2021 23:59:55 +0100 Subject: [PATCH 1/5] range-diff: support reading mbox files Internally, the `git range-diff` command spawns a `git log` process and parses its output for the given commit ranges. This works well when the patches that need to be compared are present in the local repository in the form of commits. In scenarios where that is not the case, the `range-diff` command is currently less helpful. The Git mailing list is such a scenario: Instead of using Git to exchange commits, the patches are sent there as plain-text and no commit range can be specified to let `range-diff` consume those patches. Instead, the expectation is to download the mails, apply them locally and then use `range-diff`. This can be quite cumbersome e.g. when a suitable base revision has to be found first where the patch applies cleanly. Let's offer a way to read those patches from pre-prepared MBox files instead when an argument "mbox:" is passed instead of a commit range. For extra convenience, interpret the filename `-` as standard input. This makes it easy to compare contributions on the mailing list with the actual commits that were integrated into Git's main branch. Example: commit=5c4003ca3f0e9ac6d3c8aa3e387ff843bd440411 mid=bdfa3845b81531863941e6a97c28eb1afa62dd2c.1489435755.git.johannes.schindelin@gmx.de curl -s https://lore.kernel.org/git/$mid/raw | git range-diff mbox:- $commit^! This addresses https://github.com/gitgitgadget/git/issues/207 Signed-off-by: Johannes Schindelin --- Documentation/git-range-diff.adoc | 3 +- range-diff.c | 332 +++++++++++++++++++++++++++++- t/t3206-range-diff.sh | 27 +++ 3 files changed, 360 insertions(+), 2 deletions(-) diff --git a/Documentation/git-range-diff.adoc b/Documentation/git-range-diff.adoc index db0e4279b52847..5b709b65a16b3b 100644 --- a/Documentation/git-range-diff.adoc +++ b/Documentation/git-range-diff.adoc @@ -38,7 +38,8 @@ There are three ways to specify the commit ranges: - ` `: Either commit range can be of the form `..`, `^!` or `^-`. See `SPECIFYING RANGES` - in linkgit:gitrevisions[7] for more details. + in linkgit:gitrevisions[7] for more details. Alternatively, the + patches can be provided as an mbox-formatted file via `mbox:`. - `...`. This is equivalent to `.. ..`. diff --git a/range-diff.c b/range-diff.c index 8a2dcbee322e72..ef8fe9cc55635c 100644 --- a/range-diff.c +++ b/range-diff.c @@ -20,6 +20,8 @@ #include "userdiff.h" #include "apply.h" #include "revision.h" +#include "dir.h" +#include "hex.h" struct patch_util { /* For the search for an exact match */ @@ -34,6 +36,306 @@ struct patch_util { struct object_id oid; }; +static inline int strtost(char const *s, size_t *result, const char **end) +{ + unsigned long u; + char *p; + + /* negative values would be accepted by strtoul */ + if (!isdigit(*s)) + return -1; + errno = 0; + u = strtoul(s, &p, 10); + if (errno || p == s) + return -1; + if (result) + *result = u; + *end = p; + + return 0; +} + +static int parse_hunk_header(const char *p, + size_t *old_count, size_t *new_count, + const char **end) +{ + size_t o = 1, n = 1; + + if (!skip_prefix(p, "@@ -", &p) || + strtost(p, NULL, &p) || + /* The range is -[,], defaulting to count = 1 */ + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &o, &p))) || + !skip_prefix(p, " +", &p) || + strtost(p, NULL, &p) || + /* The range is +[,], defaulting to count = 1 */ + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &n, &p))) || + !skip_prefix(p, " @@", &p)) + return -1; + + *old_count = o; + *new_count = n; + *end = p; + + return 0; +} + +/* + * This function finds the end of the line, replaces the newline character with + * a NUL, and returns the offset of the start of the next line. + * + * If no newline character was found, it returns the offset of the trailing NUL + * instead. + */ +static inline int find_next_line(const char *line, size_t size) +{ + char *eol; + + eol = memchr(line, '\n', size); + if (!eol) + return size; + + *eol = '\0'; + + return eol + 1 - line; +} + +static int read_mbox(const char *path, struct string_list *list) +{ + struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; + struct patch_util *util = NULL; + enum { + MBOX_BEFORE_HEADER, + MBOX_IN_HEADER, + MBOX_IN_COMMIT_MESSAGE, + MBOX_AFTER_TRIPLE_DASH, + MBOX_IN_DIFF + } state = MBOX_BEFORE_HEADER; + char *line, *current_filename = NULL; + int len; + size_t size, old_count = 0, new_count = 0; + const char *author = NULL, *subject = NULL; + + if (!strcmp(path, "-")) { + if (strbuf_read(&contents, STDIN_FILENO, 0) < 0) + return error_errno(_("could not read stdin")); + } else if (strbuf_read_file(&contents, path, 0) < 0) + return error_errno(_("could not read '%s'"), path); + + line = contents.buf; + size = contents.len; + for (; size; size -= len, line += len) { + const char *p; + + len = find_next_line(line, size); + + if (state == MBOX_BEFORE_HEADER) { +parse_from_delimiter: + if (!skip_prefix(line, "From ", &p)) + continue; + + if (util) + BUG("util already allocated"); + util = xcalloc(1, sizeof(*util)); + if (get_oid_hex(p, &util->oid) < 0) + oidcpy(&util->oid, null_oid(the_hash_algo)); + util->matching = -1; + author = subject = NULL; + + state = MBOX_IN_HEADER; + continue; + } + + if (starts_with(line, "diff --git ")) { + struct patch patch = { 0 }; + struct strbuf root = STRBUF_INIT; + int linenr = 0; + int orig_len; + + if (len == size) { + error(_("incomplete diff header: '%s'"), line); +fail: + release_patch(&patch); + free(util); + free(current_filename); + string_list_clear(list, 1); + strbuf_release(&buf); + strbuf_release(&contents); + return -1; + } + + if (line[len - 2] == '\r') { + error(_("cannot handle diff headers with " + "CR/LF line endings")); + goto fail; + } + + state = MBOX_IN_DIFF; + old_count = new_count = 0; + strbuf_addch(&buf, '\n'); + if (!util->diff_offset) + util->diff_offset = buf.len; + + orig_len = len; + /* `find_next_line()`'s replaced the LF with a NUL */ + line[len - 1] = '\n'; + len = parse_git_diff_header(&root, &linenr, 1, line, + len, size, &patch); + if (len < 0) { + error(_("could not parse git header '%.*s'"), + orig_len, line); + goto fail; + } + + if (patch.old_name) + skip_prefix(patch.old_name, "a/", + (const char **)&patch.old_name); + if (patch.new_name) + skip_prefix(patch.new_name, "b/", + (const char **)&patch.new_name); + + strbuf_addstr(&buf, " ## "); + if (patch.is_new) + strbuf_addf(&buf, "%s (new)", patch.new_name); + else if (patch.is_delete) + strbuf_addf(&buf, "%s (deleted)", patch.old_name); + else if (patch.is_rename) + strbuf_addf(&buf, "%s => %s", patch.old_name, patch.new_name); + else + strbuf_addstr(&buf, patch.new_name); + + free(current_filename); + if (patch.is_delete) + current_filename = xstrdup(patch.old_name); + else + current_filename = xstrdup(patch.new_name); + + if (patch.new_mode && patch.old_mode && + patch.old_mode != patch.new_mode) + strbuf_addf(&buf, " (mode change %06o => %06o)", + patch.old_mode, patch.new_mode); + + strbuf_addstr(&buf, " ##\n"); + util->diffsize++; + release_patch(&patch); + } else if (state == MBOX_IN_HEADER) { + if (!line[0]) { + state = MBOX_IN_COMMIT_MESSAGE; + /* Look for an in-body From: */ + if (skip_prefix(line + 1, "From: ", &p)) { + size -= p - line; + line += p - line; + len = find_next_line(line, size); + + while (isspace(*p)) + p++; + author = p; + } + strbuf_addstr(&buf, " ## Metadata ##\n"); + if (author) + strbuf_addf(&buf, "Author: %s\n", author); + strbuf_addstr(&buf, "\n ## Commit message ##\n"); + if (subject) + strbuf_addf(&buf, " %s\n\n", subject); + } else if (skip_prefix(line, "From: ", &p)) { + while (isspace(*p)) + p++; + author = p; + } else if (skip_prefix(line, "Subject: ", &p)) { + const char *q; + + while (isspace(*p)) + p++; + subject = p; + + if (starts_with(p, "[PATCH") && + (q = strchr(p, ']'))) { + q++; + while (isspace(*q)) + q++; + subject = q; + } + } + } else if (state == MBOX_IN_COMMIT_MESSAGE) { + if (!line[0]) { + strbuf_addch(&buf, '\n'); + } else if (strcmp(line, "---")) { + int tabs = 0; + + /* simulate tab expansion */ + while (line[tabs] == '\t') + tabs++; + strbuf_addf(&buf, "%*s%s\n", + 4 + 8 * tabs, "", line + tabs); + } else { + /* + * Trim the trailing newline that is added + * by `format-patch`. + */ + strbuf_trim_trailing_newline(&buf); + state = MBOX_AFTER_TRIPLE_DASH; + } + } else if (state == MBOX_IN_DIFF) { + switch (line[0]) { + case '\0': + /* diff might be followed by empty lines */ + if (old_count || new_count) + warning(_("diff ended %d/%d lines early"), + (int)old_count, (int)new_count); + break; + case '+': + case '-': + case ' ': + /* A `-- ` line indicates the end of a diff */ + if (!old_count && !new_count) + break; + if (old_count && line[0] != '+') + old_count--; + if (new_count && line[0] != '-') + new_count--; + /* fallthrough */ + case '\\': + strbuf_addstr(&buf, line); + strbuf_addch(&buf, '\n'); + util->diffsize++; + continue; + case '@': + if (parse_hunk_header(line, &old_count, + &new_count, &p)) + break; + + strbuf_addstr(&buf, "@@"); + if (current_filename && *p) + strbuf_addf(&buf, " %s:", + current_filename); + strbuf_addstr(&buf, p); + strbuf_addch(&buf, '\n'); + util->diffsize++; + continue; + } + + if (util) { + string_list_append(list, buf.buf)->util = util; + util = NULL; + strbuf_reset(&buf); + } + state = MBOX_BEFORE_HEADER; + goto parse_from_delimiter; + } + } + strbuf_release(&contents); + + if (util) { + if (state == MBOX_IN_DIFF) + string_list_append(list, buf.buf)->util = util; + else + free(util); + } + strbuf_release(&buf); + free(current_filename); + + return 0; +} + /* * Reads the patches into a string list, with the `util` field being populated * as struct object_id (will need to be free()d). @@ -50,6 +352,10 @@ static int read_patches(const char *range, struct string_list *list, ssize_t len; size_t size; int ret = -1; + const char *path; + + if (skip_prefix(range, "mbox:", &path)) + return read_mbox(path, list); strvec_pushl(&cp.args, "log", "--no-color", "-p", "--reverse", "--date-order", "--decorate=no", @@ -450,6 +756,19 @@ static void output_pair_header(struct diff_options *diffopt, strbuf_addch(buf, ' '); pp_commit_easy(CMIT_FMT_ONELINE, commit, buf); + } else { + struct patch_util *util = b_util ? b_util : a_util; + const char *needle = "\n ## Commit message ##\n"; + const char *p = !util || !util->patch ? + NULL : strstr(util->patch, needle); + if (p) { + if (status == '!') + strbuf_addf(buf, "%s%s", color_reset, color); + + strbuf_addch(buf, ' '); + p += strlen(needle); + strbuf_add(buf, p, strchrnul(p, '\n') - p); + } } strbuf_addf(buf, "%s\n", color_reset); @@ -583,6 +902,9 @@ int show_range_diff(const char *range1, const char *range2, if (range_diff_opts->left_only && range_diff_opts->right_only) res = error(_("options '%s' and '%s' cannot be used together"), "--left-only", "--right-only"); + if (!strcmp(range1, "mbox:-") && !strcmp(range2, "mbox:-")) + res = error(_("only one mbox can be read from stdin")); + if (!res && read_patches(range1, &branch1, range_diff_opts->other_arg, include_merges)) res = error(_("could not parse log for '%s'"), range1); if (!res && read_patches(range2, &branch2, range_diff_opts->other_arg, include_merges)) @@ -604,10 +926,18 @@ int show_range_diff(const char *range1, const char *range2, int is_range_diff_range(const char *arg) { char *copy = xstrdup(arg); /* setup_revisions() modifies it */ - const char *argv[] = { "", copy, "--", NULL }; + const char *argv[] = { "", copy, "--", NULL }, *path; int i, positive = 0, negative = 0; struct rev_info revs; + if (skip_prefix(arg, "mbox:", &path)) { + free(copy); + if (!strcmp(path, "-") || file_exists(path)) + return 1; + error_errno(_("not an mbox: '%s'"), path); + return 0; + } + repo_init_revisions(the_repository, &revs, NULL); if (setup_revisions(3, argv, &revs, NULL) == 1) { for (i = 0; i < revs.pending.nr; i++) diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh index e091df6d01da90..0cea9b5c774ca4 100755 --- a/t/t3206-range-diff.sh +++ b/t/t3206-range-diff.sh @@ -857,6 +857,33 @@ test_expect_success 'ranges with pathspecs' ' ! grep "$topic_oid" actual ' +test_expect_success 'compare range vs mbox' ' + git format-patch --stdout topic..mode-only-change >mbox && + git range-diff topic...mode-only-change >expect && + + git range-diff mode-only-change..topic mbox:./mbox >actual && + test_cmp expect actual && + + sed -e "/^From: .*/{ + h + s/.*/From: Bugs Bunny / + :1 + N + /[ -z]$/b1 + G + }" mbox.from && + git range-diff mode-only-change..topic mbox:./mbox.from >actual.from && + test_cmp expect actual.from && + + append_cr mbox.cr && + test_must_fail git range-diff \ + mode-only-change..topic mbox:./mbox.cr 2>err && + grep CR/LF err && + + git range-diff mode-only-change..topic mbox:- actual.stdin && + test_cmp expect actual.stdin +' + test_expect_success 'submodule changes are shown irrespective of diff.submodule' ' git init sub-repo && test_commit -C sub-repo sub-first && From ba7181d6eace583c9e14e9beea089960cb660a21 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 6 Dec 2022 16:51:51 +0100 Subject: [PATCH 2/5] fixup! range-diff: optionally accept pathspecs Undo in-body handling; will redo in a moment. Signed-off-by: Johannes Schindelin --- range-diff.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/range-diff.c b/range-diff.c index ef8fe9cc55635c..ae7580db40b560 100644 --- a/range-diff.c +++ b/range-diff.c @@ -220,16 +220,6 @@ static int read_mbox(const char *path, struct string_list *list) } else if (state == MBOX_IN_HEADER) { if (!line[0]) { state = MBOX_IN_COMMIT_MESSAGE; - /* Look for an in-body From: */ - if (skip_prefix(line + 1, "From: ", &p)) { - size -= p - line; - line += p - line; - len = find_next_line(line, size); - - while (isspace(*p)) - p++; - author = p; - } strbuf_addstr(&buf, " ## Metadata ##\n"); if (author) strbuf_addf(&buf, "Author: %s\n", author); From 941994110414800d8107eafb717d4df82f15d0d8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 6 Dec 2022 17:13:55 +0100 Subject: [PATCH 3/5] range-diff(mbox): grok long header lines The mbox format allows for long header values to be broken apart into 78-column wide chunks. Parse those, so that even long commit titles are handled correctly. Signed-off-by: Johannes Schindelin --- range-diff.c | 84 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/range-diff.c b/range-diff.c index ae7580db40b560..b3771be20eec82 100644 --- a/range-diff.c +++ b/range-diff.c @@ -55,7 +55,7 @@ static inline int strtost(char const *s, size_t *result, const char **end) return 0; } -static int parse_hunk_header(const char *p, +static int parse_line_range(const char *p, size_t *old_count, size_t *new_count, const char **end) { @@ -99,9 +99,62 @@ static inline int find_next_line(const char *line, size_t size) return eol + 1 - line; } +static void skip_whitespace(const char **p) +{ + while (isspace(**p)) + (*p)++; +} + +static void skip_patch_prefix(const char **p) +{ + const char *q; + + if (skip_prefix(*p, "[PATCH", &q) && (q = strchr(q, ']'))) { + q++; + skip_whitespace(&q); + *p = q; + } +} + +/* + * Parses a value of a mail header, starting at the `offset` of the `line` (the + * offset points to the beginning of the value). + * + * If the value is too long (i.e. if the next line starts with a space to + * indicate a continuation line), the `buf` is used to reconstruct the original, + * long value. + * + * Returns the number of consumed bytes (starting at `line`). + */ +static int parse_header_value(const char *line, int len, int offset, + size_t size, + struct strbuf *buf, const char **value) +{ + const char *orig_line = line; + + if (len >= size || line[len] != ' ') { + *value = line + offset; + return len; + } + + /* handle long header */ + strbuf_reset(buf); + strbuf_addstr(buf, line + offset); + while (len < size && line[len] == ' ') { + line += len; + size -= len; + len = find_next_line(line, size); + strbuf_addstr(buf, line); + } + + *value = buf->buf; + return (line - orig_line) + len; +} + static int read_mbox(const char *path, struct string_list *list) { struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; + struct strbuf author_buf = STRBUF_INIT, subject_buf = STRBUF_INIT; struct patch_util *util = NULL; enum { MBOX_BEFORE_HEADER, @@ -158,6 +211,8 @@ static int read_mbox(const char *path, struct string_list *list) free(util); free(current_filename); string_list_clear(list, 1); + strbuf_release(&author_buf); + strbuf_release(&subject_buf); strbuf_release(&buf); strbuf_release(&contents); return -1; @@ -227,23 +282,14 @@ static int read_mbox(const char *path, struct string_list *list) if (subject) strbuf_addf(&buf, " %s\n\n", subject); } else if (skip_prefix(line, "From: ", &p)) { - while (isspace(*p)) - p++; - author = p; + skip_whitespace(&p); + len = parse_header_value(line, len, p - line, size, + &author_buf, &author); } else if (skip_prefix(line, "Subject: ", &p)) { - const char *q; - - while (isspace(*p)) - p++; - subject = p; - - if (starts_with(p, "[PATCH") && - (q = strchr(p, ']'))) { - q++; - while (isspace(*q)) - q++; - subject = q; - } + skip_whitespace(&p); + skip_patch_prefix(&p); + len = parse_header_value(line, len, p - line, size, + &subject_buf, &subject); } } else if (state == MBOX_IN_COMMIT_MESSAGE) { if (!line[0]) { @@ -289,7 +335,7 @@ static int read_mbox(const char *path, struct string_list *list) util->diffsize++; continue; case '@': - if (parse_hunk_header(line, &old_count, + if (parse_line_range(line, &old_count, &new_count, &p)) break; @@ -320,6 +366,8 @@ static int read_mbox(const char *path, struct string_list *list) else free(util); } + strbuf_release(&author_buf); + strbuf_release(&subject_buf); strbuf_release(&buf); free(current_filename); From 66a7eabef0b4079fce00941b4d3e2aa210eb4d35 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Jun 2023 14:32:32 +0200 Subject: [PATCH 4/5] WIP Signed-off-by: Johannes Schindelin --- range-diff.c | 327 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 202 insertions(+), 125 deletions(-) diff --git a/range-diff.c b/range-diff.c index b3771be20eec82..2c4c8706a433f9 100644 --- a/range-diff.c +++ b/range-diff.c @@ -79,24 +79,49 @@ static int parse_line_range(const char *p, return 0; } +struct parser_state { + struct strbuf buf; + const char *line; + size_t next_line_offset; + enum { + MBOX_BEFORE_HEADER, + MBOX_IN_HEADER, + MBOX_AFTER_HEADERS, + MBOX_IN_COMMIT_MESSAGE, + MBOX_AFTER_TRIPLE_DASH, + MBOX_IN_DIFF + } state; +}; + +static inline int is_last_line(struct parser_state *s) +{ + return s->next_line_offset >= s->buf.len; +} + /* * This function finds the end of the line, replaces the newline character with - * a NUL, and returns the offset of the start of the next line. - * - * If no newline character was found, it returns the offset of the trailing NUL - * instead. + * a NUL, and updates `line`. */ -static inline int find_next_line(const char *line, size_t size) +static inline int next_line(struct parser_state *s) { - char *eol; + char *p; - eol = memchr(line, '\n', size); - if (!eol) - return size; + s->line = s->buf.buf + s->next_line_offset; - *eol = '\0'; + if (s->next_line_offset >= s->buf.len) { + s->line = NULL; + return 0; + } + + for (p = s->buf.buf + s->next_line_offset; *p; p++) + if (*p == '\n') { + *p = '\0'; + p++; + break; + } - return eol + 1 - line; + s->next_line_offset = p - s->buf.buf; + return 1; } static void skip_whitespace(const char **p) @@ -117,73 +142,108 @@ static void skip_patch_prefix(const char **p) } /* - * Parses a value of a mail header, starting at the `offset` of the `line` (the - * offset points to the beginning of the value). + * Parses a value of a mail header, starting at the `value_offset` of the + * current line. * * If the value is too long (i.e. if the next line starts with a space to - * indicate a continuation line), the `buf` is used to reconstruct the original, - * long value. + * indicate a continuation line), the `scratch` buffer is used to reconstruct + * the original, long value. * - * Returns the number of consumed bytes (starting at `line`). + * This function can also be used to merely skip a header (for example, the + * in-body `Date:` header) by passing `NULL` values for the `scratch` and + * `value` parameters). + * + * Returns the number of consumed bytes (starting at `buf`). */ -static int parse_header_value(const char *line, int len, int offset, - size_t size, - struct strbuf *buf, const char **value) +static void parse_header_value(struct parser_state *s, const char *value_start, + struct strbuf *scratch, const char **value) { - const char *orig_line = line; - - if (len >= size || line[len] != ' ') { - *value = line + offset; - return len; + if (s->next_line_offset >= s->buf.len || + s->buf.buf[s->next_line_offset] != ' ') { + if (value) + *value = value_start; + return; } /* handle long header */ - strbuf_reset(buf); - strbuf_addstr(buf, line + offset); - while (len < size && line[len] == ' ') { - line += len; - size -= len; - len = find_next_line(line, size); - strbuf_addstr(buf, line); + if (value) { + strbuf_reset(scratch); + strbuf_addstr(scratch, value_start); + } + while (s->next_line_offset < s->buf.len && + s->buf.buf[s->next_line_offset] == ' ') { + next_line(s); + if (value) + strbuf_addstr(scratch, s->line + 1); } - *value = buf->buf; - return (line - orig_line) + len; + if (value) + *value = scratch->buf; +} + +static int looks_like_a_header(const char *line) +{ + if (*line == ':') + return 0; + while (isalnum(*line) || *line == '-') + line++; + return *line == ':'; +} + +static void parse_headers(struct parser_state *s, + struct strbuf *author_buf, const char **author, + struct strbuf *subject_buf, const char **subject) +{ + const char *p; + + do { + if (skip_prefix(s->line, "From: ", &p)) { + skip_whitespace(&p); + parse_header_value(s, p, author_buf, author); + } else if (skip_prefix(s->line, "Subject: ", &p)) { + skip_whitespace(&p); + skip_patch_prefix(&p); + parse_header_value(s, p, subject_buf, subject); + } else if (skip_prefix(s->line, "Date: ", &p)) { + /* + * The Date: header is not used by `range-diff`, but it + * is potentially part of the in-body headers and should + * therefore be skipped. + * + * It _could_ be a long date string that is broken apart + * across multiple lines, so we'll parse it even if we + * do not use the parsed value. + */ + skip_whitespace(&p); + parse_header_value(s, p, NULL, NULL); + } else if (!*s->line || !looks_like_a_header(s->line)) + break; /* empty line */ + } while (next_line(s)); } static int read_mbox(const char *path, struct string_list *list) { - struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; + struct parser_state s = { + .buf = STRBUF_INIT, + .state = MBOX_BEFORE_HEADER + }; + struct strbuf scratch = STRBUF_INIT; struct strbuf author_buf = STRBUF_INIT, subject_buf = STRBUF_INIT; struct patch_util *util = NULL; - enum { - MBOX_BEFORE_HEADER, - MBOX_IN_HEADER, - MBOX_IN_COMMIT_MESSAGE, - MBOX_AFTER_TRIPLE_DASH, - MBOX_IN_DIFF - } state = MBOX_BEFORE_HEADER; - char *line, *current_filename = NULL; - int len; - size_t size, old_count = 0, new_count = 0; - const char *author = NULL, *subject = NULL; + char *current_filename = NULL; + size_t old_count = 0, new_count = 0; + const char *author = NULL, *subject = NULL, *p; if (!strcmp(path, "-")) { - if (strbuf_read(&contents, STDIN_FILENO, 0) < 0) + if (strbuf_read(&s.buf, STDIN_FILENO, 0) < 0) return error_errno(_("could not read stdin")); - } else if (strbuf_read_file(&contents, path, 0) < 0) + } else if (strbuf_read_file(&s.buf, path, 0) < 0) return error_errno(_("could not read '%s'"), path); - line = contents.buf; - size = contents.len; - for (; size; size -= len, line += len) { - const char *p; - - len = find_next_line(line, size); - - if (state == MBOX_BEFORE_HEADER) { + while (next_line(&s)) { + if (s.state == MBOX_BEFORE_HEADER) { parse_from_delimiter: - if (!skip_prefix(line, "From ", &p)) + if (!skip_prefix(s.line, "From ", &p)) continue; if (util) @@ -194,18 +254,20 @@ static int read_mbox(const char *path, struct string_list *list) util->matching = -1; author = subject = NULL; - state = MBOX_IN_HEADER; + s.state = MBOX_IN_HEADER; continue; } - if (starts_with(line, "diff --git ")) { + if (starts_with(s.line, "diff --git ")) { struct patch patch = { 0 }; struct strbuf root = STRBUF_INIT; - int linenr = 0; + int linenr = 0, offset; int orig_len; + unsigned int size; - if (len == size) { - error(_("incomplete diff header: '%s'"), line); + if (s.next_line_offset >= s.buf.len) { + error(_("incomplete diff header: '%s'"), + s.line); fail: release_patch(&patch); free(util); @@ -213,33 +275,37 @@ static int read_mbox(const char *path, struct string_list *list) string_list_clear(list, 1); strbuf_release(&author_buf); strbuf_release(&subject_buf); - strbuf_release(&buf); - strbuf_release(&contents); + strbuf_release(&scratch); + strbuf_release(&s.buf); return -1; } - if (line[len - 2] == '\r') { + orig_len = strlen(s.line); + if (s.line[orig_len - 1] == '\r') { error(_("cannot handle diff headers with " "CR/LF line endings")); goto fail; } - state = MBOX_IN_DIFF; + s.state = MBOX_IN_DIFF; old_count = new_count = 0; - strbuf_addch(&buf, '\n'); + strbuf_addch(&scratch, '\n'); if (!util->diff_offset) - util->diff_offset = buf.len; - - orig_len = len; - /* `find_next_line()`'s replaced the LF with a NUL */ - line[len - 1] = '\n'; - len = parse_git_diff_header(&root, &linenr, 1, line, - len, size, &patch); - if (len < 0) { + util->diff_offset = scratch.len; + + /* `next_line()`'s replaced the LF with a NUL */ + s.buf.buf[s.next_line_offset - 1] = '\n'; + s.next_line_offset -= orig_len + 1; + size = s.buf.len - s.next_line_offset; + offset = parse_git_diff_header(&root, &linenr, 1, + s.line, orig_len + 1, + size, &patch); + if (offset < 0) { error(_("could not parse git header '%.*s'"), - orig_len, line); + orig_len, s.line); goto fail; } + s.next_line_offset += offset; if (patch.old_name) skip_prefix(patch.old_name, "a/", @@ -248,15 +314,15 @@ static int read_mbox(const char *path, struct string_list *list) skip_prefix(patch.new_name, "b/", (const char **)&patch.new_name); - strbuf_addstr(&buf, " ## "); + strbuf_addstr(&scratch, " ## "); if (patch.is_new) - strbuf_addf(&buf, "%s (new)", patch.new_name); + strbuf_addf(&scratch, "%s (new)", patch.new_name); else if (patch.is_delete) - strbuf_addf(&buf, "%s (deleted)", patch.old_name); + strbuf_addf(&scratch, "%s (deleted)", patch.old_name); else if (patch.is_rename) - strbuf_addf(&buf, "%s => %s", patch.old_name, patch.new_name); + strbuf_addf(&scratch, "%s => %s", patch.old_name, patch.new_name); else - strbuf_addstr(&buf, patch.new_name); + strbuf_addstr(&scratch, patch.new_name); free(current_filename); if (patch.is_delete) @@ -266,52 +332,63 @@ static int read_mbox(const char *path, struct string_list *list) if (patch.new_mode && patch.old_mode && patch.old_mode != patch.new_mode) - strbuf_addf(&buf, " (mode change %06o => %06o)", + strbuf_addf(&scratch, " (mode change %06o => %06o)", patch.old_mode, patch.new_mode); - strbuf_addstr(&buf, " ##\n"); + strbuf_addstr(&scratch, " ##\n"); util->diffsize++; release_patch(&patch); - } else if (state == MBOX_IN_HEADER) { - if (!line[0]) { - state = MBOX_IN_COMMIT_MESSAGE; - strbuf_addstr(&buf, " ## Metadata ##\n"); + } else if (s.state == MBOX_IN_HEADER) { + parse_headers(&s, + &author_buf, &author, + &subject_buf, &subject); + if (!*s.line) { + /* parse in-body headers, if any */ + if (!is_last_line(&s) && + (starts_with(s.line + 1, "From: ") || + starts_with(s.line + 1, "Subject: ") || + starts_with(s.line + 1, "Date: "))) { + next_line(&s); + parse_headers(&s, + &author_buf, &author, + &subject_buf, &subject); + } + s.state = MBOX_IN_COMMIT_MESSAGE; + + strbuf_addstr(&scratch, " ## Metadata ##\n"); if (author) - strbuf_addf(&buf, "Author: %s\n", author); - strbuf_addstr(&buf, "\n ## Commit message ##\n"); + strbuf_addf(&scratch, "Author: %s\n", + author); + strbuf_addstr(&scratch, + "\n ## Commit message ##\n"); if (subject) - strbuf_addf(&buf, " %s\n\n", subject); - } else if (skip_prefix(line, "From: ", &p)) { - skip_whitespace(&p); - len = parse_header_value(line, len, p - line, size, - &author_buf, &author); - } else if (skip_prefix(line, "Subject: ", &p)) { - skip_whitespace(&p); - skip_patch_prefix(&p); - len = parse_header_value(line, len, p - line, size, - &subject_buf, &subject); + strbuf_addf(&scratch, " %s\n\n", + subject); + if (*s.line) + goto in_commit_message; } - } else if (state == MBOX_IN_COMMIT_MESSAGE) { - if (!line[0]) { - strbuf_addch(&buf, '\n'); - } else if (strcmp(line, "---")) { + } else if (s.state == MBOX_IN_COMMIT_MESSAGE) { +in_commit_message: + if (!*s.line) { + strbuf_addch(&scratch, '\n'); + } else if (strcmp(s.line, "---")) { int tabs = 0; /* simulate tab expansion */ - while (line[tabs] == '\t') + while (s.line[tabs] == '\t') tabs++; - strbuf_addf(&buf, "%*s%s\n", - 4 + 8 * tabs, "", line + tabs); + strbuf_addf(&scratch, "%*s%s\n", + 4 + 8 * tabs, "", s.line + tabs); } else { /* * Trim the trailing newline that is added * by `format-patch`. */ - strbuf_trim_trailing_newline(&buf); - state = MBOX_AFTER_TRIPLE_DASH; + // strbuf_trim_trailing_newline(&scratch); + s.state = MBOX_AFTER_TRIPLE_DASH; } - } else if (state == MBOX_IN_DIFF) { - switch (line[0]) { + } else if (s.state == MBOX_IN_DIFF) { + switch (s.line[0]) { case '\0': /* diff might be followed by empty lines */ if (old_count || new_count) @@ -324,51 +401,51 @@ static int read_mbox(const char *path, struct string_list *list) /* A `-- ` line indicates the end of a diff */ if (!old_count && !new_count) break; - if (old_count && line[0] != '+') + if (old_count && s.line[0] != '+') old_count--; - if (new_count && line[0] != '-') + if (new_count && s.line[0] != '-') new_count--; /* fallthrough */ case '\\': - strbuf_addstr(&buf, line); - strbuf_addch(&buf, '\n'); + strbuf_addstr(&scratch, s.line); + strbuf_addch(&scratch, '\n'); util->diffsize++; continue; case '@': - if (parse_line_range(line, &old_count, + if (parse_line_range(s.line, &old_count, &new_count, &p)) break; - strbuf_addstr(&buf, "@@"); + strbuf_addstr(&scratch, "@@"); if (current_filename && *p) - strbuf_addf(&buf, " %s:", + strbuf_addf(&scratch, " %s:", current_filename); - strbuf_addstr(&buf, p); - strbuf_addch(&buf, '\n'); + strbuf_addstr(&scratch, p); + strbuf_addch(&scratch, '\n'); util->diffsize++; continue; } if (util) { - string_list_append(list, buf.buf)->util = util; + string_list_append(list, scratch.buf)->util = util; util = NULL; - strbuf_reset(&buf); + strbuf_reset(&scratch); } - state = MBOX_BEFORE_HEADER; + s.state = MBOX_BEFORE_HEADER; goto parse_from_delimiter; } } - strbuf_release(&contents); + strbuf_release(&s.buf); if (util) { - if (state == MBOX_IN_DIFF) - string_list_append(list, buf.buf)->util = util; + if (s.state == MBOX_IN_DIFF) + string_list_append(list, scratch.buf)->util = util; else free(util); } strbuf_release(&author_buf); strbuf_release(&subject_buf); - strbuf_release(&buf); + strbuf_release(&scratch); free(current_filename); return 0; From 77c82f618d2f002c3e70a37d86ae22274db92cb8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 29 Aug 2023 09:47:59 +0200 Subject: [PATCH 5/5] WIP Need to add a test for a bare diff (i.e. without commit message), to allow things like: git range-diff $COMMIT^! mbox:<(git diff HEAD) Signed-off-by: Johannes Schindelin --- range-diff.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/range-diff.c b/range-diff.c index 2c4c8706a433f9..a235423979e59d 100644 --- a/range-diff.c +++ b/range-diff.c @@ -242,6 +242,15 @@ static int read_mbox(const char *path, struct string_list *list) while (next_line(&s)) { if (s.state == MBOX_BEFORE_HEADER) { + if (starts_with(s.line, "diff --git ")) { + /* This is a patch without commit message */ + util = xcalloc(1, sizeof(*util)); + oidcpy(&util->oid, null_oid()); + util->matching = -1; + author = subject = "(none)"; + goto process_diff; + } + parse_from_delimiter: if (!skip_prefix(s.line, "From ", &p)) continue; @@ -258,6 +267,7 @@ static int read_mbox(const char *path, struct string_list *list) continue; } +process_diff: if (starts_with(s.line, "diff --git ")) { struct patch patch = { 0 }; struct strbuf root = STRBUF_INIT;