Cleanup xdfile_t and xrecord_t in xdiff. #2048

gitgitgadget-git · 2025-09-09T09:23:28Z

On the Git mailing list, Elijah Newren wrote (reply to this):

On Sun, Sep 7, 2025 at 12:46 PM Ezekiel Newren via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > When 0 <= i < xdfile_t.nreff the following is true: > xdfile_t.ha[i] == xdfile_t.recs[xdfile_t.rindex[i]] I like getting rid of redundant stuff. One thing to note here is that you're replacing a single indirection with two... > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 24 ++++++++++++++---------- > xdiff/xprepare.c | 12 ++---------- > xdiff/xtypes.h | 1 - > 3 files changed, 16 insertions(+), 21 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index bbf0161f84..11cd090b53 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -22,6 +22,11 @@ > > #include "xinclude.h" > > +static unsigned long get_hash(xdfile_t *xdf, long index) > +{ > + return xdf->recs[xdf->rindex[index]]->ha; > +} > + > #define XDL_MAX_COST_MIN 256 > #define XDL_HEUR_MIN_COST 256 > #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) > @@ -42,8 +47,8 @@ typedef struct s_xdpsplit { > * using this algorithm, so a little bit of heuristic is needed to cut the > * search and to return a suboptimal point. > */ > -static long xdl_split(unsigned long const *ha1, long off1, long lim1, > - unsigned long const *ha2, long off2, long lim2, > +static long xdl_split(xdfile_t *xdf1, long off1, long lim1, > + xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, > xdalgoenv_t *xenv) { > long dmin = off1 - lim2, dmax = lim1 - off2; > @@ -87,7 +92,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > i1 = kvdf[d + 1]; > prev1 = i1; > i2 = i1 - d; > - for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); > + for (; i1 < lim1 && i2 < lim2 && get_hash(xdf1, i1) == get_hash(xdf2, i2); i1++, i2++); You're not going to be happy with me asking, so sorry in advance, but I'm really curious...we are now replacing a single indirection with a double-indirection inside a for loop, which is nested within two other for loops. Three levels of for-loops to me suggests it might be a hot codepath. Does this double indirection in these codepaths affect performance? > if (i1 - prev1 > xenv->snake_cnt) > got_snake = 1; > kvdf[d] = i1; > @@ -124,7 +129,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > i1 = kvdb[d + 1] - 1; > prev1 = i1; > i2 = i1 - d; > - for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); > + for (; i1 > off1 && i2 > off2 && get_hash(xdf1, i1 - 1) == get_hash(xdf2, i2 - 1); i1--, i2--); > if (prev1 - i1 > xenv->snake_cnt) > got_snake = 1; > kvdb[d] = i1; > @@ -159,7 +164,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > if (v > XDL_K_HEUR * ec && v > best && > off1 + xenv->snake_cnt <= i1 && i1 < lim1 && > off2 + xenv->snake_cnt <= i2 && i2 < lim2) { > - for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) > + for (k = 1; get_hash(xdf1, i1 - k) == get_hash(xdf2, i2 - k); k++) > if (k == xenv->snake_cnt) { > best = v; > spl->i1 = i1; > @@ -183,7 +188,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > if (v > XDL_K_HEUR * ec && v > best && > off1 < i1 && i1 <= lim1 - xenv->snake_cnt && > off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { > - for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) > + for (k = 0; get_hash(xdf1, i1 + k) == get_hash(xdf2, i2 + k); k++) > if (k == xenv->snake_cnt - 1) { > best = v; > spl->i1 = i1; > @@ -260,13 +265,12 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { > - unsigned long const *ha1 = xdf1->ha, *ha2 = xdf2->ha; > > /* > * Shrink the box by walking through each diagonal snake (SW and NE). > */ > - for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); > - for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); > + for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, off1) == get_hash(xdf2, off2); off1++, off2++); > + for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, lim1 - 1) == get_hash(xdf2, lim2 - 1); lim1--, lim2--); > > /* > * If one dimension is empty, then all records on the other one must > @@ -285,7 +289,7 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > /* > * Divide ... > */ > - if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, > + if (xdl_split(xdf1, off1, lim1, xdf2, off2, lim2, kvdf, kvdb, > need_min, &spl, xenv) < 0) { > > return -1; > diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c > index 91b0ed54e0..59730989a3 100644 > --- a/xdiff/xprepare.c > +++ b/xdiff/xprepare.c > @@ -134,7 +134,6 @@ static void xdl_free_ctx(xdfile_t *xdf) > > xdl_free(xdf->rindex); > xdl_free(xdf->rchg - 1); > - xdl_free(xdf->ha); > xdl_free(xdf->recs); > xdl_cha_free(&xdf->rcha); > } > @@ -147,7 +146,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > char const *blk, *cur, *top, *prev; > xrecord_t *crec; > > - xdf->ha = NULL; > xdf->rindex = NULL; > xdf->rchg = NULL; > xdf->recs = NULL; > @@ -182,8 +180,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { > if (!XDL_ALLOC_ARRAY(xdf->rindex, xdf->nrec + 1)) > goto abort; > - if (!XDL_ALLOC_ARRAY(xdf->ha, xdf->nrec + 1)) > - goto abort; > } > > xdf->rchg += 1; > @@ -301,9 +297,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > i <= xdf1->dend; i++, recs++) { > if (dis1[i] == 1 || > (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { > - xdf1->rindex[nreff] = i; > - xdf1->ha[nreff] = (*recs)->ha; > - nreff++; > + xdf1->rindex[nreff++] = i; > } else > xdf1->rchg[i] = 1; > } > @@ -313,9 +307,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > i <= xdf2->dend; i++, recs++) { > if (dis2[i] == 1 || > (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { > - xdf2->rindex[nreff] = i; > - xdf2->ha[nreff] = (*recs)->ha; > - nreff++; > + xdf2->rindex[nreff++] = i; > } else > xdf2->rchg[i] = 1; > } > diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h > index 8b8467360e..85848f1685 100644 > --- a/xdiff/xtypes.h > +++ b/xdiff/xtypes.h > @@ -52,7 +52,6 @@ typedef struct s_xdfile { > char *rchg; > long *rindex; > long nreff; > - unsigned long *ha; > } xdfile_t; > > typedef struct s_xdfenv { > -- > gitgitgadget Other than the performance question, it looks like you've made a straightforward mechanical change as highlighted in the commit message.

gitgitgadget-git · 2025-09-09T09:25:53Z

On the Git mailing list, Elijah Newren wrote (reply to this):

On Sun, Sep 7, 2025 at 12:46 PM Ezekiel Newren via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> My personal bias is that things like "view with --color-words" makes more sense to include near the end of the commit message, just before the sign-offs. Not sure if others agree on that. > The chastore_t type is very unfriendly to Rust FFI. It's also redundant > since 'recs' is a vector type that grows every time an xrecord_t is > added. The second sentence seems to presume the reader knows what chastore_t type is for, and about the confusing dual layering between it and recs.its confusing dual layering. I liked your more extended explanation in https://lore.kernel.org/git/7ea2dccd71fc502f20614ce217fc9885d1b17413.1756496539.git.gitgitgadget@gmail.com/; could some of that be used here? > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 24 ++++++++++---------- > xdiff/xemit.c | 6 ++--- > xdiff/xhistogram.c | 2 +- > xdiff/xmerge.c | 56 +++++++++++++++++++++++----------------------- > xdiff/xpatience.c | 10 ++++----- > xdiff/xprepare.c | 19 ++++++---------- > xdiff/xtypes.h | 3 +-- > xdiff/xutils.c | 12 +++++----- > 8 files changed, 63 insertions(+), 69 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 11cd090b53..a66125d44a 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -24,7 +24,7 @@ > > static unsigned long get_hash(xdfile_t *xdf, long index) > { > - return xdf->recs[xdf->rindex[index]]->ha; > + return xdf->recs[xdf->rindex[index]].ha; > } > > #define XDL_MAX_COST_MIN 256 > @@ -489,13 +489,13 @@ static void measure_split(const xdfile_t *xdf, long split, > m->indent = -1; > } else { > m->end_of_file = 0; > - m->indent = get_indent(xdf->recs[split]); > + m->indent = get_indent(&xdf->recs[split]); > } > > m->pre_blank = 0; > m->pre_indent = -1; > for (i = split - 1; i >= 0; i--) { > - m->pre_indent = get_indent(xdf->recs[i]); > + m->pre_indent = get_indent(&xdf->recs[i]); > if (m->pre_indent != -1) > break; > m->pre_blank += 1; > @@ -508,7 +508,7 @@ static void measure_split(const xdfile_t *xdf, long split, > m->post_blank = 0; > m->post_indent = -1; > for (i = split + 1; i < xdf->nrec; i++) { > - m->post_indent = get_indent(xdf->recs[i]); > + m->post_indent = get_indent(&xdf->recs[i]); > if (m->post_indent != -1) > break; > m->post_blank += 1; > @@ -752,7 +752,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) > static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->end < xdf->nrec && > - recs_match(xdf->recs[g->start], xdf->recs[g->end])) { > + recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { > xdf->rchg[g->start++] = 0; > xdf->rchg[g->end++] = 1; > > @@ -773,7 +773,7 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) > static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->start > 0 && > - recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) { > + recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) { > xdf->rchg[--g->start] = 1; > xdf->rchg[--g->end] = 0; > > @@ -988,16 +988,16 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) > > for (xch = xscr; xch; xch = xch->next) { > int ignore = 1; > - xrecord_t **rec; > + xrecord_t *rec; > long i; > > rec = &xe->xdf1.recs[xch->i1]; > for (i = 0; i < xch->chg1 && ignore; i++) > - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); > + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); > > rec = &xe->xdf2.recs[xch->i2]; > for (i = 0; i < xch->chg2 && ignore; i++) > - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); > + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); > > xch->ignore = ignore; > } > @@ -1021,7 +1021,7 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe, > xdchange_t *xch; > > for (xch = xscr; xch; xch = xch->next) { > - xrecord_t **rec; > + xrecord_t *rec; > int ignore = 1; > long i; > > @@ -1033,11 +1033,11 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe, > > rec = &xe->xdf1.recs[xch->i1]; > for (i = 0; i < xch->chg1 && ignore; i++) > - ignore = record_matches_regex(rec[i], xpp); > + ignore = record_matches_regex(&rec[i], xpp); > > rec = &xe->xdf2.recs[xch->i2]; > for (i = 0; i < xch->chg2 && ignore; i++) > - ignore = record_matches_regex(rec[i], xpp); > + ignore = record_matches_regex(&rec[i], xpp); > > xch->ignore = ignore; > } > diff --git a/xdiff/xemit.c b/xdiff/xemit.c > index 2161ac3cd0..b2f1f30cd3 100644 > --- a/xdiff/xemit.c > +++ b/xdiff/xemit.c > @@ -25,7 +25,7 @@ > > static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) > { > - xrecord_t *rec = xdf->recs[ri]; > + xrecord_t *rec = &xdf->recs[ri]; > > if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) > return -1; > @@ -110,7 +110,7 @@ static long def_ff(const char *rec, long len, char *buf, long sz) > static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, > char *buf, long sz) > { > - xrecord_t *rec = xdf->recs[ri]; > + xrecord_t *rec = &xdf->recs[ri]; > > if (!xecfg->find_func) > return def_ff(rec->ptr, rec->size, buf, sz); > @@ -150,7 +150,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, > > static int is_empty_rec(xdfile_t *xdf, long ri) > { > - xrecord_t *rec = xdf->recs[ri]; > + xrecord_t *rec = &xdf->recs[ri]; > long i = 0; > > for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); > diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c > index 040d81e0bc..4d857e8ae2 100644 > --- a/xdiff/xhistogram.c > +++ b/xdiff/xhistogram.c > @@ -86,7 +86,7 @@ struct region { > ((LINE_MAP(index, ptr))->cnt) > > #define REC(env, s, l) \ > - (env->xdf##s.recs[l - 1]) > + (&env->xdf##s.recs[l - 1]) > > static int cmp_recs(xrecord_t *r1, xrecord_t *r2) > { > diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c > index af40c88a5b..fd600cbb5d 100644 > --- a/xdiff/xmerge.c > +++ b/xdiff/xmerge.c > @@ -97,12 +97,12 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, > int line_count, long flags) > { > int i; > - xrecord_t **rec1 = xe1->xdf2.recs + i1; > - xrecord_t **rec2 = xe2->xdf2.recs + i2; > + xrecord_t *rec1 = xe1->xdf2.recs + i1; > + xrecord_t *rec2 = xe2->xdf2.recs + i2; > > for (i = 0; i < line_count; i++) { > - int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size, > - rec2[i]->ptr, rec2[i]->size, flags); > + int result = xdl_recmatch(rec1[i].ptr, rec1[i].size, > + rec2[i].ptr, rec2[i].size, flags); > if (!result) > return -1; > } > @@ -111,7 +111,7 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, > > static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) > { > - xrecord_t **recs; > + xrecord_t *recs; > int size = 0; > > recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; > @@ -119,12 +119,12 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee > if (count < 1) > return 0; > > - for (i = 0; i < count; size += recs[i++]->size) > + for (i = 0; i < count; size += recs[i++].size) > if (dest) > - memcpy(dest + size, recs[i]->ptr, recs[i]->size); > + memcpy(dest + size, recs[i].ptr, recs[i].size); > if (add_nl) { > - i = recs[count - 1]->size; > - if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { > + i = recs[count - 1].size; > + if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') { > if (needs_cr) { > if (dest) > dest[size] = '\r'; > @@ -160,22 +160,22 @@ static int is_eol_crlf(xdfile_t *file, int i) > > if (i < file->nrec - 1) > /* All lines before the last *must* end in LF */ > - return (size = file->recs[i]->size) > 1 && > - file->recs[i]->ptr[size - 2] == '\r'; > + return (size = file->recs[i].size) > 1 && > + file->recs[i].ptr[size - 2] == '\r'; > if (!file->nrec) > /* Cannot determine eol style from empty file */ > return -1; > - if ((size = file->recs[i]->size) && > - file->recs[i]->ptr[size - 1] == '\n') > + if ((size = file->recs[i].size) && > + file->recs[i].ptr[size - 1] == '\n') > /* Last line; ends in LF; Is it CR/LF? */ > return size > 1 && > - file->recs[i]->ptr[size - 2] == '\r'; > + file->recs[i].ptr[size - 2] == '\r'; > if (!i) > /* The only line has no eol */ > return -1; > /* Determine eol from second-to-last line */ > - return (size = file->recs[i - 1]->size) > 1 && > - file->recs[i - 1]->ptr[size - 2] == '\r'; > + return (size = file->recs[i - 1].size) > 1 && > + file->recs[i - 1].ptr[size - 2] == '\r'; > } > > static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m) > @@ -334,22 +334,22 @@ static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags) > static void xdl_refine_zdiff3_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, > xpparam_t const *xpp) > { > - xrecord_t **rec1 = xe1->xdf2.recs, **rec2 = xe2->xdf2.recs; > + xrecord_t *rec1 = xe1->xdf2.recs, *rec2 = xe2->xdf2.recs; > for (; m; m = m->next) { > /* let's handle just the conflicts */ > if (m->mode) > continue; > > while(m->chg1 && m->chg2 && > - recmatch(rec1[m->i1], rec2[m->i2], xpp->flags)) { > + recmatch(&rec1[m->i1], &rec2[m->i2], xpp->flags)) { > m->chg1--; > m->chg2--; > m->i1++; > m->i2++; > } > while (m->chg1 && m->chg2 && > - recmatch(rec1[m->i1 + m->chg1 - 1], > - rec2[m->i2 + m->chg2 - 1], xpp->flags)) { > + recmatch(&rec1[m->i1 + m->chg1 - 1], > + &rec2[m->i2 + m->chg2 - 1], xpp->flags)) { > m->chg1--; > m->chg2--; > } > @@ -381,12 +381,12 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, > * This probably does not work outside git, since > * we have a very simple mmfile structure. > */ > - t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr; > - t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr > - + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr; > - t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr; > - t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr > - + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr; > + t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr; > + t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr > + + xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr; > + t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr; > + t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr > + + xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr; > if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) > return -1; > if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || > @@ -440,8 +440,8 @@ static int line_contains_alnum(const char *ptr, long size) > static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) > { > for (; chg; chg--, i++) > - if (line_contains_alnum(xe->xdf2.recs[i]->ptr, > - xe->xdf2.recs[i]->size)) > + if (line_contains_alnum(xe->xdf2.recs[i].ptr, > + xe->xdf2.recs[i].size)) > return 1; > return 0; > } > diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c > index 77dc411d19..bf69a58527 100644 > --- a/xdiff/xpatience.c > +++ b/xdiff/xpatience.c > @@ -88,9 +88,9 @@ static int is_anchor(xpparam_t const *xpp, const char *line) > static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, > int pass) > { > - xrecord_t **records = pass == 1 ? > + xrecord_t *records = pass == 1 ? > map->env->xdf1.recs : map->env->xdf2.recs; > - xrecord_t *record = records[line - 1]; > + xrecord_t *record = &records[line - 1]; > /* > * After xdl_prepare_env() (or more precisely, due to > * xdl_classify_record()), the "ha" member of the records (AKA lines) > @@ -121,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, > return; > map->entries[index].line1 = line; > map->entries[index].hash = record->ha; > - map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr); > + map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); > if (!map->first) > map->first = map->entries + index; > if (map->last) { > @@ -246,8 +246,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) > > static int match(struct hashmap *map, int line1, int line2) > { > - xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; > - xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; > + xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; > + xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; > return record1->ha == record2->ha; > } > > diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c > index 6f1d4b4725..92f9845003 100644 > --- a/xdiff/xprepare.c > +++ b/xdiff/xprepare.c > @@ -131,7 +131,6 @@ static void xdl_free_ctx(xdfile_t *xdf) > xdl_free(xdf->rindex); > xdl_free(xdf->rchg - 1); > xdl_free(xdf->recs); > - xdl_cha_free(&xdf->rcha); > } > > > @@ -146,8 +145,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > xdf->rchg = NULL; > xdf->recs = NULL; > > - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) > - goto abort; > if (!XDL_ALLOC_ARRAY(xdf->recs, narec)) > goto abort; > > @@ -158,12 +155,10 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > hav = xdl_hash_record(&cur, top, xpp->flags); > if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) > goto abort; > - if (!(crec = xdl_cha_alloc(&xdf->rcha))) > - goto abort; > + crec = &xdf->recs[xdf->nrec++]; > crec->ptr = prev; > crec->size = (long) (cur - prev); > crec->ha = hav; > - xdf->recs[xdf->nrec++] = crec; > if (xdl_classify_record(pass, cf, crec) < 0) > goto abort; > } > @@ -263,7 +258,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { > */ > static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { > long i, nm, nreff, mlim; > - xrecord_t **recs; > + xrecord_t *recs; > xdlclass_t *rcrec; > char *dis, *dis1, *dis2; > int need_min = !!(cf->flags & XDF_NEED_MINIMAL); > @@ -276,7 +271,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) > mlim = XDL_MAX_EQLIMIT; > for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { > - rcrec = cf->rcrecs[(*recs)->ha]; > + rcrec = cf->rcrecs[recs->ha]; > nm = rcrec ? rcrec->len2 : 0; > dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; > } > @@ -284,7 +279,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) > mlim = XDL_MAX_EQLIMIT; > for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { > - rcrec = cf->rcrecs[(*recs)->ha]; > + rcrec = cf->rcrecs[recs->ha]; > nm = rcrec ? rcrec->len1 : 0; > dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; > } > @@ -320,13 +315,13 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > */ > static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { > long i, lim; > - xrecord_t **recs1, **recs2; > + xrecord_t *recs1, *recs2; > > recs1 = xdf1->recs; > recs2 = xdf2->recs; > for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; > i++, recs1++, recs2++) > - if ((*recs1)->ha != (*recs2)->ha) > + if (recs1->ha != recs2->ha) > break; > > xdf1->dstart = xdf2->dstart = i; > @@ -334,7 +329,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { > recs1 = xdf1->recs + xdf1->nrec - 1; > recs2 = xdf2->recs + xdf2->nrec - 1; > for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) > - if ((*recs1)->ha != (*recs2)->ha) > + if (recs1->ha != recs2->ha) > break; > > xdf1->dend = xdf1->nrec - i - 1; > diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h > index 85848f1685..3d26cbf1ec 100644 > --- a/xdiff/xtypes.h > +++ b/xdiff/xtypes.h > @@ -45,10 +45,9 @@ typedef struct s_xrecord { > } xrecord_t; > > typedef struct s_xdfile { > - chastore_t rcha; > + xrecord_t *recs; > long nrec; > long dstart, dend; > - xrecord_t **recs; > char *rchg; > long *rindex; > long nreff; > diff --git a/xdiff/xutils.c b/xdiff/xutils.c > index 444a108f87..332982b509 100644 > --- a/xdiff/xutils.c > +++ b/xdiff/xutils.c > @@ -416,12 +416,12 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, > mmfile_t subfile1, subfile2; > xdfenv_t env; > > - subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; > - subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + > - diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; > - subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; > - subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + > - diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; > + subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr; > + subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr + > + diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr; > + subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr; > + subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr + > + diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr; > if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) > return -1; > > -- > gitgitgadget You weren't kidding with the --color-words callout; there's an awful lot of places where you only change one or two characters (e.g. '->' becoming '.'); that's much easier to see when viewing the diff with that flag. Anyway, looks good.

On the Git mailing list, Phillip Wood wrote (reply to this):

On 09/09/2025 09:58, Elijah Newren wrote: > On Sun, Sep 7, 2025 at 12:46 PM Ezekiel Newren via GitGitGadget > <gitgitgadget@gmail.com> wrote: >> >> From: Ezekiel Newren <ezekielnewren@gmail.com> >> The chastore_t type is very unfriendly to Rust FFI. It's also redundant >> since 'recs' is a vector type that grows every time an xrecord_t is >> added. > > The second sentence seems to presume the reader knows what chastore_t > type is for, and about the confusing dual layering between it and > recs.its confusing dual layering. I liked your more extended > explanation in https://lore.kernel.org/git/7ea2dccd71fc502f20614ce217fc9885d1b17413.1756496539.git.gitgitgadget@gmail.com/; > could some of that be used here? I agree that's a better explaination. I also think it would be helpful to spell out the implications of this change. If I understand the change correctly we now store all the records in a contiguous array, rather than having the records in a arena and storing a separate array of pointers to those records. As sizeof(xrecord_t) is pretty small the change to contiguous storage hopefully wont cause any allocation issues, though I guess it does mean we end up copying more data as we grow the array compared to using an arena. Overall these first few patches look like a really nice cleanup. Thanks Phillip >> >> Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> >> --- >> xdiff/xdiffi.c | 24 ++++++++++---------- >> xdiff/xemit.c | 6 ++--- >> xdiff/xhistogram.c | 2 +- >> xdiff/xmerge.c | 56 +++++++++++++++++++++++----------------------- >> xdiff/xpatience.c | 10 ++++----- >> xdiff/xprepare.c | 19 ++++++---------- >> xdiff/xtypes.h | 3 +-- >> xdiff/xutils.c | 12 +++++----- >> 8 files changed, 63 insertions(+), 69 deletions(-) >> >> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c >> index 11cd090b53..a66125d44a 100644 >> --- a/xdiff/xdiffi.c >> +++ b/xdiff/xdiffi.c >> @@ -24,7 +24,7 @@ >> >> static unsigned long get_hash(xdfile_t *xdf, long index) >> { >> - return xdf->recs[xdf->rindex[index]]->ha; >> + return xdf->recs[xdf->rindex[index]].ha; >> } >> >> #define XDL_MAX_COST_MIN 256 >> @@ -489,13 +489,13 @@ static void measure_split(const xdfile_t *xdf, long split, >> m->indent = -1; >> } else { >> m->end_of_file = 0; >> - m->indent = get_indent(xdf->recs[split]); >> + m->indent = get_indent(&xdf->recs[split]); >> } >> >> m->pre_blank = 0; >> m->pre_indent = -1; >> for (i = split - 1; i >= 0; i--) { >> - m->pre_indent = get_indent(xdf->recs[i]); >> + m->pre_indent = get_indent(&xdf->recs[i]); >> if (m->pre_indent != -1) >> break; >> m->pre_blank += 1; >> @@ -508,7 +508,7 @@ static void measure_split(const xdfile_t *xdf, long split, >> m->post_blank = 0; >> m->post_indent = -1; >> for (i = split + 1; i < xdf->nrec; i++) { >> - m->post_indent = get_indent(xdf->recs[i]); >> + m->post_indent = get_indent(&xdf->recs[i]); >> if (m->post_indent != -1) >> break; >> m->post_blank += 1; >> @@ -752,7 +752,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) >> static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) >> { >> if (g->end < xdf->nrec && >> - recs_match(xdf->recs[g->start], xdf->recs[g->end])) { >> + recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { >> xdf->rchg[g->start++] = 0; >> xdf->rchg[g->end++] = 1; >> >> @@ -773,7 +773,7 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) >> static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) >> { >> if (g->start > 0 && >> - recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) { >> + recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) { >> xdf->rchg[--g->start] = 1; >> xdf->rchg[--g->end] = 0; >> >> @@ -988,16 +988,16 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) >> >> for (xch = xscr; xch; xch = xch->next) { >> int ignore = 1; >> - xrecord_t **rec; >> + xrecord_t *rec; >> long i; >> >> rec = &xe->xdf1.recs[xch->i1]; >> for (i = 0; i < xch->chg1 && ignore; i++) >> - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); >> + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); >> >> rec = &xe->xdf2.recs[xch->i2]; >> for (i = 0; i < xch->chg2 && ignore; i++) >> - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); >> + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); >> >> xch->ignore = ignore; >> } >> @@ -1021,7 +1021,7 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe, >> xdchange_t *xch; >> >> for (xch = xscr; xch; xch = xch->next) { >> - xrecord_t **rec; >> + xrecord_t *rec; >> int ignore = 1; >> long i; >> >> @@ -1033,11 +1033,11 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe, >> >> rec = &xe->xdf1.recs[xch->i1]; >> for (i = 0; i < xch->chg1 && ignore; i++) >> - ignore = record_matches_regex(rec[i], xpp); >> + ignore = record_matches_regex(&rec[i], xpp); >> >> rec = &xe->xdf2.recs[xch->i2]; >> for (i = 0; i < xch->chg2 && ignore; i++) >> - ignore = record_matches_regex(rec[i], xpp); >> + ignore = record_matches_regex(&rec[i], xpp); >> >> xch->ignore = ignore; >> } >> diff --git a/xdiff/xemit.c b/xdiff/xemit.c >> index 2161ac3cd0..b2f1f30cd3 100644 >> --- a/xdiff/xemit.c >> +++ b/xdiff/xemit.c >> @@ -25,7 +25,7 @@ >> >> static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) >> { >> - xrecord_t *rec = xdf->recs[ri]; >> + xrecord_t *rec = &xdf->recs[ri]; >> >> if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) >> return -1; >> @@ -110,7 +110,7 @@ static long def_ff(const char *rec, long len, char *buf, long sz) >> static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, >> char *buf, long sz) >> { >> - xrecord_t *rec = xdf->recs[ri]; >> + xrecord_t *rec = &xdf->recs[ri]; >> >> if (!xecfg->find_func) >> return def_ff(rec->ptr, rec->size, buf, sz); >> @@ -150,7 +150,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, >> >> static int is_empty_rec(xdfile_t *xdf, long ri) >> { >> - xrecord_t *rec = xdf->recs[ri]; >> + xrecord_t *rec = &xdf->recs[ri]; >> long i = 0; >> >> for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); >> diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c >> index 040d81e0bc..4d857e8ae2 100644 >> --- a/xdiff/xhistogram.c >> +++ b/xdiff/xhistogram.c >> @@ -86,7 +86,7 @@ struct region { >> ((LINE_MAP(index, ptr))->cnt) >> >> #define REC(env, s, l) \ >> - (env->xdf##s.recs[l - 1]) >> + (&env->xdf##s.recs[l - 1]) >> >> static int cmp_recs(xrecord_t *r1, xrecord_t *r2) >> { >> diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c >> index af40c88a5b..fd600cbb5d 100644 >> --- a/xdiff/xmerge.c >> +++ b/xdiff/xmerge.c >> @@ -97,12 +97,12 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, >> int line_count, long flags) >> { >> int i; >> - xrecord_t **rec1 = xe1->xdf2.recs + i1; >> - xrecord_t **rec2 = xe2->xdf2.recs + i2; >> + xrecord_t *rec1 = xe1->xdf2.recs + i1; >> + xrecord_t *rec2 = xe2->xdf2.recs + i2; >> >> for (i = 0; i < line_count; i++) { >> - int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size, >> - rec2[i]->ptr, rec2[i]->size, flags); >> + int result = xdl_recmatch(rec1[i].ptr, rec1[i].size, >> + rec2[i].ptr, rec2[i].size, flags); >> if (!result) >> return -1; >> } >> @@ -111,7 +111,7 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, >> >> static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) >> { >> - xrecord_t **recs; >> + xrecord_t *recs; >> int size = 0; >> >> recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; >> @@ -119,12 +119,12 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee >> if (count < 1) >> return 0; >> >> - for (i = 0; i < count; size += recs[i++]->size) >> + for (i = 0; i < count; size += recs[i++].size) >> if (dest) >> - memcpy(dest + size, recs[i]->ptr, recs[i]->size); >> + memcpy(dest + size, recs[i].ptr, recs[i].size); >> if (add_nl) { >> - i = recs[count - 1]->size; >> - if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { >> + i = recs[count - 1].size; >> + if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') { >> if (needs_cr) { >> if (dest) >> dest[size] = '\r'; >> @@ -160,22 +160,22 @@ static int is_eol_crlf(xdfile_t *file, int i) >> >> if (i < file->nrec - 1) >> /* All lines before the last *must* end in LF */ >> - return (size = file->recs[i]->size) > 1 && >> - file->recs[i]->ptr[size - 2] == '\r'; >> + return (size = file->recs[i].size) > 1 && >> + file->recs[i].ptr[size - 2] == '\r'; >> if (!file->nrec) >> /* Cannot determine eol style from empty file */ >> return -1; >> - if ((size = file->recs[i]->size) && >> - file->recs[i]->ptr[size - 1] == '\n') >> + if ((size = file->recs[i].size) && >> + file->recs[i].ptr[size - 1] == '\n') >> /* Last line; ends in LF; Is it CR/LF? */ >> return size > 1 && >> - file->recs[i]->ptr[size - 2] == '\r'; >> + file->recs[i].ptr[size - 2] == '\r'; >> if (!i) >> /* The only line has no eol */ >> return -1; >> /* Determine eol from second-to-last line */ >> - return (size = file->recs[i - 1]->size) > 1 && >> - file->recs[i - 1]->ptr[size - 2] == '\r'; >> + return (size = file->recs[i - 1].size) > 1 && >> + file->recs[i - 1].ptr[size - 2] == '\r'; >> } >> >> static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m) >> @@ -334,22 +334,22 @@ static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags) >> static void xdl_refine_zdiff3_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, >> xpparam_t const *xpp) >> { >> - xrecord_t **rec1 = xe1->xdf2.recs, **rec2 = xe2->xdf2.recs; >> + xrecord_t *rec1 = xe1->xdf2.recs, *rec2 = xe2->xdf2.recs; >> for (; m; m = m->next) { >> /* let's handle just the conflicts */ >> if (m->mode) >> continue; >> >> while(m->chg1 && m->chg2 && >> - recmatch(rec1[m->i1], rec2[m->i2], xpp->flags)) { >> + recmatch(&rec1[m->i1], &rec2[m->i2], xpp->flags)) { >> m->chg1--; >> m->chg2--; >> m->i1++; >> m->i2++; >> } >> while (m->chg1 && m->chg2 && >> - recmatch(rec1[m->i1 + m->chg1 - 1], >> - rec2[m->i2 + m->chg2 - 1], xpp->flags)) { >> + recmatch(&rec1[m->i1 + m->chg1 - 1], >> + &rec2[m->i2 + m->chg2 - 1], xpp->flags)) { >> m->chg1--; >> m->chg2--; >> } >> @@ -381,12 +381,12 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, >> * This probably does not work outside git, since >> * we have a very simple mmfile structure. >> */ >> - t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr; >> - t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr >> - + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr; >> - t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr; >> - t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr >> - + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr; >> + t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr; >> + t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr >> + + xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr; >> + t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr; >> + t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr >> + + xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr; >> if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) >> return -1; >> if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || >> @@ -440,8 +440,8 @@ static int line_contains_alnum(const char *ptr, long size) >> static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) >> { >> for (; chg; chg--, i++) >> - if (line_contains_alnum(xe->xdf2.recs[i]->ptr, >> - xe->xdf2.recs[i]->size)) >> + if (line_contains_alnum(xe->xdf2.recs[i].ptr, >> + xe->xdf2.recs[i].size)) >> return 1; >> return 0; >> } >> diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c >> index 77dc411d19..bf69a58527 100644 >> --- a/xdiff/xpatience.c >> +++ b/xdiff/xpatience.c >> @@ -88,9 +88,9 @@ static int is_anchor(xpparam_t const *xpp, const char *line) >> static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, >> int pass) >> { >> - xrecord_t **records = pass == 1 ? >> + xrecord_t *records = pass == 1 ? >> map->env->xdf1.recs : map->env->xdf2.recs; >> - xrecord_t *record = records[line - 1]; >> + xrecord_t *record = &records[line - 1]; >> /* >> * After xdl_prepare_env() (or more precisely, due to >> * xdl_classify_record()), the "ha" member of the records (AKA lines) >> @@ -121,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, >> return; >> map->entries[index].line1 = line; >> map->entries[index].hash = record->ha; >> - map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr); >> + map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); >> if (!map->first) >> map->first = map->entries + index; >> if (map->last) { >> @@ -246,8 +246,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) >> >> static int match(struct hashmap *map, int line1, int line2) >> { >> - xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; >> - xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; >> + xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; >> + xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; >> return record1->ha == record2->ha; >> } >> >> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c >> index 6f1d4b4725..92f9845003 100644 >> --- a/xdiff/xprepare.c >> +++ b/xdiff/xprepare.c >> @@ -131,7 +131,6 @@ static void xdl_free_ctx(xdfile_t *xdf) >> xdl_free(xdf->rindex); >> xdl_free(xdf->rchg - 1); >> xdl_free(xdf->recs); >> - xdl_cha_free(&xdf->rcha); >> } >> >> >> @@ -146,8 +145,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ >> xdf->rchg = NULL; >> xdf->recs = NULL; >> >> - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) >> - goto abort; >> if (!XDL_ALLOC_ARRAY(xdf->recs, narec)) >> goto abort; >> >> @@ -158,12 +155,10 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ >> hav = xdl_hash_record(&cur, top, xpp->flags); >> if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) >> goto abort; >> - if (!(crec = xdl_cha_alloc(&xdf->rcha))) >> - goto abort; >> + crec = &xdf->recs[xdf->nrec++]; >> crec->ptr = prev; >> crec->size = (long) (cur - prev); >> crec->ha = hav; >> - xdf->recs[xdf->nrec++] = crec; >> if (xdl_classify_record(pass, cf, crec) < 0) >> goto abort; >> } >> @@ -263,7 +258,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { >> */ >> static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { >> long i, nm, nreff, mlim; >> - xrecord_t **recs; >> + xrecord_t *recs; >> xdlclass_t *rcrec; >> char *dis, *dis1, *dis2; >> int need_min = !!(cf->flags & XDF_NEED_MINIMAL); >> @@ -276,7 +271,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd >> if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) >> mlim = XDL_MAX_EQLIMIT; >> for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { >> - rcrec = cf->rcrecs[(*recs)->ha]; >> + rcrec = cf->rcrecs[recs->ha]; >> nm = rcrec ? rcrec->len2 : 0; >> dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; >> } >> @@ -284,7 +279,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd >> if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) >> mlim = XDL_MAX_EQLIMIT; >> for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { >> - rcrec = cf->rcrecs[(*recs)->ha]; >> + rcrec = cf->rcrecs[recs->ha]; >> nm = rcrec ? rcrec->len1 : 0; >> dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; >> } >> @@ -320,13 +315,13 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd >> */ >> static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { >> long i, lim; >> - xrecord_t **recs1, **recs2; >> + xrecord_t *recs1, *recs2; >> >> recs1 = xdf1->recs; >> recs2 = xdf2->recs; >> for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; >> i++, recs1++, recs2++) >> - if ((*recs1)->ha != (*recs2)->ha) >> + if (recs1->ha != recs2->ha) >> break; >> >> xdf1->dstart = xdf2->dstart = i; >> @@ -334,7 +329,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { >> recs1 = xdf1->recs + xdf1->nrec - 1; >> recs2 = xdf2->recs + xdf2->nrec - 1; >> for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) >> - if ((*recs1)->ha != (*recs2)->ha) >> + if (recs1->ha != recs2->ha) >> break; >> >> xdf1->dend = xdf1->nrec - i - 1; >> diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h >> index 85848f1685..3d26cbf1ec 100644 >> --- a/xdiff/xtypes.h >> +++ b/xdiff/xtypes.h >> @@ -45,10 +45,9 @@ typedef struct s_xrecord { >> } xrecord_t; >> >> typedef struct s_xdfile { >> - chastore_t rcha; >> + xrecord_t *recs; >> long nrec; >> long dstart, dend; >> - xrecord_t **recs; >> char *rchg; >> long *rindex; >> long nreff; >> diff --git a/xdiff/xutils.c b/xdiff/xutils.c >> index 444a108f87..332982b509 100644 >> --- a/xdiff/xutils.c >> +++ b/xdiff/xutils.c >> @@ -416,12 +416,12 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, >> mmfile_t subfile1, subfile2; >> xdfenv_t env; >> >> - subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; >> - subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + >> - diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; >> - subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; >> - subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + >> - diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; >> + subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr; >> + subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr + >> + diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr; >> + subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr; >> + subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr + >> + diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr; >> if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) >> return -1; >> >> -- >> gitgitgadget > > You weren't kidding with the --color-words callout; there's an awful > lot of places where you only change one or two characters (e.g. '->' > becoming '.'); that's much easier to see when viewing the diff with > that flag. > > Anyway, looks good. >

On the Git mailing list, Junio C Hamano wrote (reply to this):

Elijah Newren <newren@gmail.com> writes: > On Sun, Sep 7, 2025 at 12:46 PM Ezekiel Newren via GitGitGadget > <gitgitgadget@gmail.com> wrote: >> >> From: Ezekiel Newren <ezekielnewren@gmail.com> > > My personal bias is that things like "view with --color-words" makes > more sense to include near the end of the commit message, just before > the sign-offs. Not sure if others agree on that. FWIW, I am with you. Certainly not on the title. It is even fine immediately below the three-dash line before the diffstat. Thanks. I am enjoying to follow along the series by reading your reviews.

On the Git mailing list, Ben Knoble wrote (reply to this):

> Le 9 sept. 2025 à 04:58, Elijah Newren <newren@gmail.com> a écrit : > > On Sun, Sep 7, 2025 at 12:46 PM Ezekiel Newren via GitGitGadget > <gitgitgadget@gmail.com> wrote: >> >> From: Ezekiel Newren <ezekielnewren@gmail.com> > > My personal bias is that things like "view with --color-words" makes > more sense to include near the end of the commit message, just before > the sign-offs. Not sure if others agree on that. I’ve been using a Best-viewed-with trailer; I saw Peff do something similar once, I think.

gitgitgadget-git · 2025-09-09T09:22:30Z

On the Git mailing list, Elijah Newren wrote (reply to this):

On Sun, Sep 7, 2025 at 12:45 PM Ezekiel Newren via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > Every field in this struct is an alias for a certain field in xdfile_t. > > diffdata_t.nrec -> xdfile_t.nreff > diffdata_t.ha -> xdfile_t.ha > diffdata_t.rindex -> xdfile_t.rindex > diffdata_t.rchg -> xdfile_t.recharge > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 32 ++++++++------------------------ > xdiff/xdiffi.h | 11 ++--------- > 2 files changed, 10 insertions(+), 33 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 5a96e36dfb..bbf0161f84 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -257,10 +257,10 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > * sub-boxes by calling the box splitting function. Note that the real job > * (marking changed lines) is done in the two boundary reaching checks. > */ > -int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > - diffdata_t *dd2, long off2, long lim2, > +int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > + xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { > - unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; > + unsigned long const *ha1 = xdf1->ha, *ha2 = xdf2->ha; > > /* > * Shrink the box by walking through each diagonal snake (SW and NE). > @@ -273,17 +273,11 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > * be obviously changed. > */ > if (off1 == lim1) { > - char *rchg2 = dd2->rchg; > - long *rindex2 = dd2->rindex; > - > for (; off2 < lim2; off2++) > - rchg2[rindex2[off2]] = 1; > + xdf2->rchg[xdf2->rindex[off2]] = 1; > } else if (off2 == lim2) { > - char *rchg1 = dd1->rchg; > - long *rindex1 = dd1->rindex; > - > for (; off1 < lim1; off1++) > - rchg1[rindex1[off1]] = 1; > + xdf1->rchg[xdf1->rindex[off1]] = 1; > } else { > xdpsplit_t spl; > spl.i1 = spl.i2 = 0; > @@ -300,9 +294,9 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > /* > * ... et Impera. > */ > - if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, > + if (xdl_recs_cmp(xdf1, off1, spl.i1, xdf2, off2, spl.i2, > kvdf, kvdb, spl.min_lo, xenv) < 0 || > - xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, > + xdl_recs_cmp(xdf1, spl.i1, lim1, xdf2, spl.i2, lim2, > kvdf, kvdb, spl.min_hi, xenv) < 0) { > > return -1; > @@ -318,7 +312,6 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > long ndiags; > long *kvd, *kvdf, *kvdb; > xdalgoenv_t xenv; > - diffdata_t dd1, dd2; > int res; > > if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) > @@ -357,16 +350,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > xenv.snake_cnt = XDL_SNAKE_CNT; > xenv.heur_min = XDL_HEUR_MIN_COST; > > - dd1.nrec = xe->xdf1.nreff; > - dd1.ha = xe->xdf1.ha; > - dd1.rchg = xe->xdf1.rchg; > - dd1.rindex = xe->xdf1.rindex; > - dd2.nrec = xe->xdf2.nreff; > - dd2.ha = xe->xdf2.ha; > - dd2.rchg = xe->xdf2.rchg; > - dd2.rindex = xe->xdf2.rindex; > - > - res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, > + res = xdl_recs_cmp(&xe->xdf1, 0, xe->xdf1.nreff, &xe->xdf2, 0, xe->xdf2.nreff, > kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, > &xenv); > xdl_free(kvd); > diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h > index 126c9d8ff4..49e52c67f9 100644 > --- a/xdiff/xdiffi.h > +++ b/xdiff/xdiffi.h > @@ -24,13 +24,6 @@ > #define XDIFFI_H > > > -typedef struct s_diffdata { > - long nrec; > - unsigned long const *ha; > - long *rindex; > - char *rchg; > -} diffdata_t; > - > typedef struct s_xdalgoenv { > long mxcost; > long snake_cnt; > @@ -46,8 +39,8 @@ typedef struct s_xdchange { > > > > -int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > - diffdata_t *dd2, long off2, long lim2, > +int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > + xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); > int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > xdfenv_t *xe); > -- > gitgitgadget Viewing this commit with --color-moved helps highlight in the code what you say in the commit message. Makes sense.

gitgitgadget-git · 2025-09-21T13:28:04Z

On the Git mailing list, Phillip Wood wrote (reply to this):

Hi Ezekiel On 19/09/2025 16:16, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > Every field in this struct is an alias for a certain field in xdfile_t. > > diffdata_t.nrec -> xdfile_t.nreff > diffdata_t.ha -> xdfile_t.ha > diffdata_t.rindex -> xdfile_t.rindex > diffdata_t.rchg -> xdfile_t.rchg That explains some of the changes here (so long as one assumes the aliasing is a bad thing) but it does not explain why it is a good idea to remove the local variables rchg[12] and rindex[12] and instead dereference xdf[12] inside the loops Thanks Phillip > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 32 ++++++++------------------------ > xdiff/xdiffi.h | 11 ++--------- > 2 files changed, 10 insertions(+), 33 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 5a96e36dfb..bbf0161f84 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -257,10 +257,10 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, > * sub-boxes by calling the box splitting function. Note that the real job > * (marking changed lines) is done in the two boundary reaching checks. > */ > -int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > - diffdata_t *dd2, long off2, long lim2, > +int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > + xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { > - unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; > + unsigned long const *ha1 = xdf1->ha, *ha2 = xdf2->ha; > > /* > * Shrink the box by walking through each diagonal snake (SW and NE). > @@ -273,17 +273,11 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > * be obviously changed. > */ > if (off1 == lim1) { > - char *rchg2 = dd2->rchg; > - long *rindex2 = dd2->rindex; > - > for (; off2 < lim2; off2++) > - rchg2[rindex2[off2]] = 1; > + xdf2->rchg[xdf2->rindex[off2]] = 1; > } else if (off2 == lim2) { > - char *rchg1 = dd1->rchg; > - long *rindex1 = dd1->rindex; > - > for (; off1 < lim1; off1++) > - rchg1[rindex1[off1]] = 1; > + xdf1->rchg[xdf1->rindex[off1]] = 1; > } else { > xdpsplit_t spl; > spl.i1 = spl.i2 = 0; > @@ -300,9 +294,9 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > /* > * ... et Impera. > */ > - if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, > + if (xdl_recs_cmp(xdf1, off1, spl.i1, xdf2, off2, spl.i2, > kvdf, kvdb, spl.min_lo, xenv) < 0 || > - xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, > + xdl_recs_cmp(xdf1, spl.i1, lim1, xdf2, spl.i2, lim2, > kvdf, kvdb, spl.min_hi, xenv) < 0) { > > return -1; > @@ -318,7 +312,6 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > long ndiags; > long *kvd, *kvdf, *kvdb; > xdalgoenv_t xenv; > - diffdata_t dd1, dd2; > int res; > > if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) > @@ -357,16 +350,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > xenv.snake_cnt = XDL_SNAKE_CNT; > xenv.heur_min = XDL_HEUR_MIN_COST; > > - dd1.nrec = xe->xdf1.nreff; > - dd1.ha = xe->xdf1.ha; > - dd1.rchg = xe->xdf1.rchg; > - dd1.rindex = xe->xdf1.rindex; > - dd2.nrec = xe->xdf2.nreff; > - dd2.ha = xe->xdf2.ha; > - dd2.rchg = xe->xdf2.rchg; > - dd2.rindex = xe->xdf2.rindex; > - > - res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, > + res = xdl_recs_cmp(&xe->xdf1, 0, xe->xdf1.nreff, &xe->xdf2, 0, xe->xdf2.nreff, > kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, > &xenv); > xdl_free(kvd); > diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h > index 126c9d8ff4..49e52c67f9 100644 > --- a/xdiff/xdiffi.h > +++ b/xdiff/xdiffi.h > @@ -24,13 +24,6 @@ > #define XDIFFI_H > > > -typedef struct s_diffdata { > - long nrec; > - unsigned long const *ha; > - long *rindex; > - char *rchg; > -} diffdata_t; > - > typedef struct s_xdalgoenv { > long mxcost; > long snake_cnt; > @@ -46,8 +39,8 @@ typedef struct s_xdchange { > > > > -int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, > - diffdata_t *dd2, long off2, long lim2, > +int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > + xdfile_t *xdf2, long off2, long lim2, > long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); > int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > xdfenv_t *xe);

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Sun, Sep 21, 2025 at 7:06 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > > Every field in this struct is an alias for a certain field in xdfile_t. > > > > diffdata_t.nrec -> xdfile_t.nreff > > diffdata_t.ha -> xdfile_t.ha > > diffdata_t.rindex -> xdfile_t.rindex > > diffdata_t.rchg -> xdfile_t.rchg > > That explains some of the changes here (so long as one assumes the > aliasing is a bad thing) but it does not explain why it is a good idea > to remove the local variables rchg[12] and rindex[12] and instead > dereference xdf[12] inside the loops I removed the struct and local variable aliases to make it easier for usage-tracking tools to follow where fields are actually used. Whether someone relies on grep, ctags/etags, or more modern tooling, aliases hide the true field accesses and make it harder to track or refactor those fields consistently. By using the fields directly, every reference is literal and unambiguous. Also the local variable names don't add any meaning and only barely shorten the lines where they are used.

gitgitgadget-git · 2025-09-24T10:28:12Z

On the Git mailing list, Phillip Wood wrote (reply to this):

On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > The only values possible for 'changed' is 1 and 0, which exactly maps > to a bool type. It might not look like this is the case because > matches1 and matches2 (which use to be dis1, and dis2) were also char > and were assigned numerical values within a few lines of 'changed' > (what used to be rchg). > > Using NONE, SOME, TOO_MANY for matches1[i]/matches2[j], and true/false > for changed[k] makes it clear to future readers that these are > logically separate concepts. Nicely explained - I think this change is a very good idea and separating it out like this makes it much clearer what's going on compared to V4. Thanks Phillip > Best-viewed-with: --color-words > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 12 ++++++------ > xdiff/xhistogram.c | 8 ++++---- > xdiff/xpatience.c | 8 ++++---- > xdiff/xprepare.c | 12 ++++++------ > xdiff/xtypes.h | 2 +- > 5 files changed, 21 insertions(+), 21 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 5535452061..b902be9d0e 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > */ > if (off1 == lim1) { > for (; off2 < lim2; off2++) > - xdf2->changed[xdf2->rindex[off2]] = 1; > + xdf2->changed[xdf2->rindex[off2]] = true; > } else if (off2 == lim2) { > for (; off1 < lim1; off1++) > - xdf1->changed[xdf1->rindex[off1]] = 1; > + xdf1->changed[xdf1->rindex[off1]] = true; > } else { > xdpsplit_t spl; > spl.i1 = spl.i2 = 0; > @@ -753,8 +753,8 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->end < xdf->nrec && > recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { > - xdf->changed[g->start++] = 0; > - xdf->changed[g->end++] = 1; > + xdf->changed[g->start++] = false; > + xdf->changed[g->end++] = true; > > while (xdf->changed[g->end]) > g->end++; > @@ -774,8 +774,8 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->start > 0 && > recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) { > - xdf->changed[--g->start] = 1; > - xdf->changed[--g->end] = 0; > + xdf->changed[--g->start] = true; > + xdf->changed[--g->end] = false; > > while (xdf->changed[g->start - 1]) > g->start--; > diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c > index 15ca15f6b0..6dc450b1fe 100644 > --- a/xdiff/xhistogram.c > +++ b/xdiff/xhistogram.c > @@ -318,11 +318,11 @@ redo: > > if (!count1) { > while(count2--) > - env->xdf2.changed[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = true; > return 0; > } else if (!count2) { > while(count1--) > - env->xdf1.changed[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = true; > return 0; > } > > @@ -335,9 +335,9 @@ redo: > else { > if (lcs.begin1 == 0 && lcs.begin2 == 0) { > while (count1--) > - env->xdf1.changed[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = true; > while (count2--) > - env->xdf2.changed[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = true; > result = 0; > } else { > result = histogram_diff(xpp, env, > diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c > index 14092ffb86..669b653580 100644 > --- a/xdiff/xpatience.c > +++ b/xdiff/xpatience.c > @@ -331,11 +331,11 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, > /* trivial case: one side is empty */ > if (!count1) { > while(count2--) > - env->xdf2.changed[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = true; > return 0; > } else if (!count2) { > while(count1--) > - env->xdf1.changed[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = true; > return 0; > } > > @@ -347,9 +347,9 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, > /* are there any matching lines at all? */ > if (!map.has_matches) { > while(count1--) > - env->xdf1.changed[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = true; > while(count2--) > - env->xdf2.changed[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = true; > xdl_free(map.entries); > return 0; > } > diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c > index e1d575f779..070d220f3b 100644 > --- a/xdiff/xprepare.c > +++ b/xdiff/xprepare.c > @@ -273,7 +273,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > > /* > * Create temporary arrays that will help us decide if > - * changed[i] should remain 0 or become 1. > + * changed[i] should remain false, or become true. > */ > if (!XDL_CALLOC_ARRAY(matches1, xdf1->nrec + 1)) { > status = -1; > @@ -305,16 +305,16 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > > /* > * Use temporary arrays to decide if changed[i] should remain > - * 0 or become 1. > + * false, or become true. > */ > for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; > i <= xdf1->dend; i++, recs++) { > if (matches1[i] == SOME || > (matches1[i] == TOO_MANY && !xdl_clean_mmatch(matches1, i, xdf1->dstart, xdf1->dend))) { > xdf1->rindex[nreff++] = i; > - /* changed[i] remains 0 */ > + /* changed[i] remains false */ > } else > - xdf1->changed[i] = 1; > + xdf1->changed[i] = true; > } > xdf1->nreff = nreff; > > @@ -323,9 +323,9 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > if (matches2[i] == SOME || > (matches2[i] == TOO_MANY && !xdl_clean_mmatch(matches2, i, xdf2->dstart, xdf2->dend))) { > xdf2->rindex[nreff++] = i; > - /* changed[i] remains 0 */ > + /* changed[i] remains false */ > } else > - xdf2->changed[i] = 1; > + xdf2->changed[i] = true; > } > xdf2->nreff = nreff; > > diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h > index c4b5d2d8fa..f145abba3e 100644 > --- a/xdiff/xtypes.h > +++ b/xdiff/xtypes.h > @@ -48,7 +48,7 @@ typedef struct s_xdfile { > xrecord_t *recs; > long nrec; > long dstart, dend; > - char *changed; > + bool *changed; > long *rindex; > long nreff; > } xdfile_t;

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > > > The only values possible for 'changed' is 1 and 0, which exactly maps > > to a bool type. It might not look like this is the case because > > matches1 and matches2 (which use to be dis1, and dis2) were also char > > and were assigned numerical values within a few lines of 'changed' > > (what used to be rchg). > > > > Using NONE, SOME, TOO_MANY for matches1[i]/matches2[j], and true/false > > for changed[k] makes it clear to future readers that these are > > logically separate concepts. > > Nicely explained - I think this change is a very good idea and > separating it out like this makes it much clearer what's going on > compared to V4. Thank you! It was obvious to me because I've been refactoring xdiff for many months now, but I wasn't doing a good job of expressing why these 2 concepts are closely related, but distinct.

gitgitgadget-git · 2025-09-24T10:28:42Z

On the Git mailing list, Phillip Wood wrote (reply to this):

On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> I agree "changed" is a better name but the commit message should explain what "rchg" is used for so that someone who is not familiar with the code can understand why the change in name is desirable. Thanks Phillip > Best-viewed-with: --color-words > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 28 ++++++++++++++-------------- > xdiff/xhistogram.c | 8 ++++---- > xdiff/xpatience.c | 8 ++++---- > xdiff/xprepare.c | 12 ++++++------ > xdiff/xtypes.h | 2 +- > xdiff/xutils.c | 4 ++-- > 6 files changed, 31 insertions(+), 31 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 83c4cff6f7..5535452061 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, > */ > if (off1 == lim1) { > for (; off2 < lim2; off2++) > - xdf2->rchg[xdf2->rindex[off2]] = 1; > + xdf2->changed[xdf2->rindex[off2]] = 1; > } else if (off2 == lim2) { > for (; off1 < lim1; off1++) > - xdf1->rchg[xdf1->rindex[off1]] = 1; > + xdf1->changed[xdf1->rindex[off1]] = 1; > } else { > xdpsplit_t spl; > spl.i1 = spl.i2 = 0; > @@ -708,7 +708,7 @@ struct xdlgroup { > static void group_init(xdfile_t *xdf, struct xdlgroup *g) > { > g->start = g->end = 0; > - while (xdf->rchg[g->end]) > + while (xdf->changed[g->end]) > g->end++; > } > > @@ -722,7 +722,7 @@ static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) > return -1; > > g->start = g->end + 1; > - for (g->end = g->start; xdf->rchg[g->end]; g->end++) > + for (g->end = g->start; xdf->changed[g->end]; g->end++) > ; > > return 0; > @@ -738,7 +738,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) > return -1; > > g->end = g->start - 1; > - for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--) > + for (g->start = g->end; xdf->changed[g->start - 1]; g->start--) > ; > > return 0; > @@ -753,10 +753,10 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->end < xdf->nrec && > recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { > - xdf->rchg[g->start++] = 0; > - xdf->rchg[g->end++] = 1; > + xdf->changed[g->start++] = 0; > + xdf->changed[g->end++] = 1; > > - while (xdf->rchg[g->end]) > + while (xdf->changed[g->end]) > g->end++; > > return 0; > @@ -774,10 +774,10 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) > { > if (g->start > 0 && > recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) { > - xdf->rchg[--g->start] = 1; > - xdf->rchg[--g->end] = 0; > + xdf->changed[--g->start] = 1; > + xdf->changed[--g->end] = 0; > > - while (xdf->rchg[g->start - 1]) > + while (xdf->changed[g->start - 1]) > g->start--; > > return 0; > @@ -938,9 +938,9 @@ int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { > * Trivial. Collects "groups" of changes and creates an edit script. > */ > for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) > - if (xe->xdf1.rchg[i1 - 1] || xe->xdf2.rchg[i2 - 1]) { > - for (l1 = i1; xe->xdf1.rchg[i1 - 1]; i1--); > - for (l2 = i2; xe->xdf2.rchg[i2 - 1]; i2--); > + if (xe->xdf1.changed[i1 - 1] || xe->xdf2.changed[i2 - 1]) { > + for (l1 = i1; xe->xdf1.changed[i1 - 1]; i1--); > + for (l2 = i2; xe->xdf2.changed[i2 - 1]; i2--); > > if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { > xdl_free_script(cscr); > diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c > index 4d857e8ae2..15ca15f6b0 100644 > --- a/xdiff/xhistogram.c > +++ b/xdiff/xhistogram.c > @@ -318,11 +318,11 @@ redo: > > if (!count1) { > while(count2--) > - env->xdf2.rchg[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = 1; > return 0; > } else if (!count2) { > while(count1--) > - env->xdf1.rchg[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = 1; > return 0; > } > > @@ -335,9 +335,9 @@ redo: > else { > if (lcs.begin1 == 0 && lcs.begin2 == 0) { > while (count1--) > - env->xdf1.rchg[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = 1; > while (count2--) > - env->xdf2.rchg[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = 1; > result = 0; > } else { > result = histogram_diff(xpp, env, > diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c > index bf69a58527..14092ffb86 100644 > --- a/xdiff/xpatience.c > +++ b/xdiff/xpatience.c > @@ -331,11 +331,11 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, > /* trivial case: one side is empty */ > if (!count1) { > while(count2--) > - env->xdf2.rchg[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = 1; > return 0; > } else if (!count2) { > while(count1--) > - env->xdf1.rchg[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = 1; > return 0; > } > > @@ -347,9 +347,9 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, > /* are there any matching lines at all? */ > if (!map.has_matches) { > while(count1--) > - env->xdf1.rchg[line1++ - 1] = 1; > + env->xdf1.changed[line1++ - 1] = 1; > while(count2--) > - env->xdf2.rchg[line2++ - 1] = 1; > + env->xdf2.changed[line2++ - 1] = 1; > xdl_free(map.entries); > return 0; > } > diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c > index 27c5a4d636..b9b19c36de 100644 > --- a/xdiff/xprepare.c > +++ b/xdiff/xprepare.c > @@ -126,7 +126,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t > static void xdl_free_ctx(xdfile_t *xdf) > { > xdl_free(xdf->rindex); > - xdl_free(xdf->rchg - 1); > + xdl_free(xdf->changed - 1); > xdl_free(xdf->recs); > } > > @@ -139,7 +139,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > xrecord_t *crec; > > xdf->rindex = NULL; > - xdf->rchg = NULL; > + xdf->changed = NULL; > xdf->recs = NULL; > > if (!XDL_ALLOC_ARRAY(xdf->recs, narec)) > @@ -161,7 +161,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > } > } > > - if (!XDL_CALLOC_ARRAY(xdf->rchg, xdf->nrec + 2)) > + if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2)) > goto abort; > > if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && > @@ -170,7 +170,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > goto abort; > } > > - xdf->rchg += 1; > + xdf->changed += 1; > xdf->nreff = 0; > xdf->dstart = 0; > xdf->dend = xdf->nrec - 1; > @@ -287,7 +287,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { > xdf1->rindex[nreff++] = i; > } else > - xdf1->rchg[i] = 1; > + xdf1->changed[i] = 1; > } > xdf1->nreff = nreff; > > @@ -297,7 +297,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { > xdf2->rindex[nreff++] = i; > } else > - xdf2->rchg[i] = 1; > + xdf2->changed[i] = 1; > } > xdf2->nreff = nreff; > > diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h > index 3d26cbf1ec..c4b5d2d8fa 100644 > --- a/xdiff/xtypes.h > +++ b/xdiff/xtypes.h > @@ -48,7 +48,7 @@ typedef struct s_xdfile { > xrecord_t *recs; > long nrec; > long dstart, dend; > - char *rchg; > + char *changed; > long *rindex; > long nreff; > } xdfile_t; > diff --git a/xdiff/xutils.c b/xdiff/xutils.c > index 332982b509..ed65c222e6 100644 > --- a/xdiff/xutils.c > +++ b/xdiff/xutils.c > @@ -425,8 +425,8 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, > if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) > return -1; > > - memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); > - memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); > + memcpy(diff_env->xdf1.changed + line1 - 1, env.xdf1.changed, count1); > + memcpy(diff_env->xdf2.changed + line2 - 1, env.xdf2.changed, count2); > > xdl_free_env(&env); >

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > I agree "changed" is a better name but the commit message should explain > what "rchg" is used for so that someone who is not familiar with the > code can understand why the change in name is desirable. The field rchg (now 'changed') declares if a line in a file is changed or not. A later commit will change it's type from 'char' to 'bool' to make its purpose even more clear. Something like that?

On the Git mailing list, Phillip Wood wrote (reply to this):

On 24/09/2025 16:10, Ezekiel Newren wrote: > On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: >> >> On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: >>> From: Ezekiel Newren <ezekielnewren@gmail.com> >> >> I agree "changed" is a better name but the commit message should explain >> what "rchg" is used for so that someone who is not familiar with the >> code can understand why the change in name is desirable. > > The field rchg (now 'changed') declares if a line in a file is changed > or not. A later commit will change it's type from 'char' to 'bool' > to make its purpose even more clear. > > Something like that? That's great Phillip

gitgitgadget-git · 2025-09-21T13:29:20Z

On the Git mailing list, Phillip Wood wrote (reply to this):

Hi Ezekiel What is the purpose of this change. On the face of it it makes the code more verbose and introduces an extra pointer dereference into the loop condition. The compiler may lift the deference out of the loop but it would be helpful to know why this change is useful. Thanks Phillip On 19/09/2025 16:16, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > Best-viewed-with: --color-words > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index a66125d44a..83c4cff6f7 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -932,16 +932,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { > > int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { > xdchange_t *cscr = NULL, *xch; > - char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; > long i1, i2, l1, l2; > > /* > * Trivial. Collects "groups" of changes and creates an edit script. > */ > for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) > - if (rchg1[i1 - 1] || rchg2[i2 - 1]) { > - for (l1 = i1; rchg1[i1 - 1]; i1--); > - for (l2 = i2; rchg2[i2 - 1]; i2--); > + if (xe->xdf1.rchg[i1 - 1] || xe->xdf2.rchg[i2 - 1]) { > + for (l1 = i1; xe->xdf1.rchg[i1 - 1]; i1--); > + for (l2 = i2; xe->xdf2.rchg[i2 - 1]; i2--); > > if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { > xdl_free_script(cscr);

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Sun, Sep 21, 2025 at 7:06 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > What is the purpose of this change. On the face of it it makes the code > more verbose and introduces an extra pointer dereference into the loop > condition. The compiler may lift the dereference out of the loop but it > would be helpful to know why this change is useful. Most of Git directly accesses rchg, so changing this to also directly access it makes the code more consistent. Also, usage tracking tools like ctags or a modern IDE won't show all uses. This makes it harder to refactor or audit. Dropping the aliases means every access is visible and discoverable through the actual field name.

gitgitgadget-git · 2025-09-24T10:28:57Z

On the Git mailing list, Phillip Wood wrote (reply to this):

On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> The commit message should explain why this change is being made Thanks Phillip > Best-viewed-with: --color-words > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xdiffi.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index a66125d44a..83c4cff6f7 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -932,16 +932,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { > > int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { > xdchange_t *cscr = NULL, *xch; > - char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; > long i1, i2, l1, l2; > > /* > * Trivial. Collects "groups" of changes and creates an edit script. > */ > for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) > - if (rchg1[i1 - 1] || rchg2[i2 - 1]) { > - for (l1 = i1; rchg1[i1 - 1]; i1--); > - for (l2 = i2; rchg2[i2 - 1]; i2--); > + if (xe->xdf1.rchg[i1 - 1] || xe->xdf2.rchg[i2 - 1]) { > + for (l1 = i1; xe->xdf1.rchg[i1 - 1]; i1--); > + for (l2 = i2; xe->xdf2.rchg[i2 - 1]; i2--); > > if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { > xdl_free_script(cscr);

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > The commit message should explain why this change is being made Reasons to delete local variable aliasing: * Usage tracking: Tools are better able to follow the usage. * Refactor churn: Later commits will refactor rchg. * No additional meaning: The local variables express the same meaning as the struct field itself. Would that suffice?

On the Git mailing list, Junio C Hamano wrote (reply to this):

Ezekiel Newren <ezekielnewren@gmail.com> writes: > On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: >> >> On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: >> > From: Ezekiel Newren <ezekielnewren@gmail.com> >> >> The commit message should explain why this change is being made > > Reasons to delete local variable aliasing: > * Usage tracking: Tools are better able to follow the usage. > * Refactor churn: Later commits will refactor rchg. > * No additional meaning: The local variables express the same meaning > as the struct field itself. > > Would that suffice? In general, I do not view the first one is a good excuse. When using a separate local variable enhannces readability of the code (which often is true, with a pointer that points deep into a nested structure member) to humans, we shouldn't blindly bend the code to cater to less intelligent tools; it needs balancing. The third one alone is not a good excuse for the same reason. It (and the first one) depends on how much benefit we are gaining from having a short-and-sweet local variables that may make the expressions and statements they are involved in easier to read. For this particular change, I would think it is on borderline, and subjective. I would be OK with the third point if you rephrase it to additionally say that the conditional and the inner loop is easy enough to follow without using the local aliases to make the code shorter (which of course is the commit author's opinion, but they deserve to have and express their opinion as part of the rationale for a change). Thanks. - char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; long i1, i2, l1, l2; /* * Trivial. Collects "groups" of changes and creates an edit script. */ for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) - if (rchg1[i1 - 1] || rchg2[i2 - 1]) { - for (l1 = i1; rchg1[i1 - 1]; i1--); - for (l2 = i2; rchg2[i2 - 1]; i2--); + if (xe->xdf1.rchg[i1 - 1] || xe->xdf2.rchg[i2 - 1]) { + for (l1 = i1; xe->xdf1.rchg[i1 - 1]; i1--); + for (l2 = i2; xe->xdf2.rchg[i2 - 1]; i2--);

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 9:34 AM Junio C Hamano <gitster@pobox.com> wrote: > > Ezekiel Newren <ezekielnewren@gmail.com> writes: > > > On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > >> > >> On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > >> > From: Ezekiel Newren <ezekielnewren@gmail.com> > >> > >> The commit message should explain why this change is being made > > > > Reasons to delete local variable aliasing: > > * Usage tracking: Tools are better able to follow the usage. > > * Refactor churn: Later commits will refactor rchg. > > * No additional meaning: The local variables express the same meaning > > as the struct field itself. > > > > Would that suffice? > > In general, I do not view the first one is a good excuse. > > When using a separate local variable enhances readability of the > code (which often is true, with a pointer that points deep into a > nested structure member) to humans, we shouldn't blindly bend the > code to cater to less intelligent tools; it needs balancing. > > The third one alone is not a good excuse for the same reason. It > (and the first one) depends on how much benefit we are gaining from > having a short-and-sweet local variables that may make the expressions > and statements they are involved in easier to read. > > For this particular change, I would think it is borderline, and > subjective. I would be OK with the third point if you rephrase it > to additionally say that the conditional and the inner loop is easy > enough to follow without using the local aliases to make the code > shorter (which of course is the commit author's opinion, but they > deserve to have and express their opinion as part of the rationale > for a change). Ok, I can see that. I have a question for everyone: Does preparing C code to be translated into Rust count as a valid reason for changing it? Provided that there is no violation of the Git style (or very small in some cases). If my intent was to keep this as C code forever I'd agree, but... My other reason is that it more closely follows Rust paradigms. Creating multiple pointers to the same memory in Rust subverts the borrow checker's ability to keep track of who owns the memory. Since C doesn't have a concept of borrowing, I decided to delete the aliasing here, and in many other places. I'm removing as much aliasing from the code as I can because it makes it easier to translate into idiomatic Rust later. Using ctags and modern IDE's to follow variable usage is convenient, but safe Rust refuses to compile in many cases where C aliasing is common. We could use unsafe Rust to make literal translations of C to Rust, but then we'd forfeit the reasons and benefits of why we want to add Rust to Git in the first place. Translating C to Rust has been difficult because many styles in Git's C flat out won't compile in Rust. Many places need a little tweaking, others need major overhauls. In all of my C cleanups I am keeping idiomatic Rust in mind.

On the Git mailing list, Junio C Hamano wrote (reply to this):

Ezekiel Newren <ezekielnewren@gmail.com> writes: > I have a question for everyone: Does preparing C code to be translated > into Rust count as a valid reason for changing it? Provided that there > is no violation of the Git style (or very small in some cases). > > If my intent was to keep this as C code forever I'd agree, but... You'd agree that "I am preparing this for eventual rewrite" would be a valid reason? Or you are agreeing with something else? > My > other reason is that it more closely follows Rust paradigms. Creating > multiple pointers to the same memory in Rust subverts the borrow > checker's ability to keep track of who owns the memory. Sure. But looking at the use of rchg[12] in xdl_build_script(), if they were "const char *", combined with the fact that they are local and their addresses are never taken (to be leaked to our callers), you wouldn't have much trouble with the current code, or would you still have issues?

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 3:31 PM Junio C Hamano <gitster@pobox.com> wrote: > > Ezekiel Newren <ezekielnewren@gmail.com> writes: > > > I have a question for everyone: Does preparing C code to be translated > > into Rust count as a valid reason for changing it? Provided that there > > is no violation of the Git style (or very small in some cases). > > > > If my intent was to keep this as C code forever I'd agree, but... > > You'd agree that "I am preparing this for eventual rewrite" would be > a valid reason? Or you are agreeing with something else? I'd agree that my reasons for making this change are insufficient. I think usage tracking tools _is_ a weak argument, but perhaps not quite as weak as what you're thinking. For example, when I renamed the rchg field to changed, it was as simple as right-clicking the field, choosing Rename, typing 'changed', and letting the IDE update every use. Patch 11/13, "xdiff: rename rchg -> changed in xdfile_t", was generated directly from that one action. That patch was clean because I had already gone through and removed all the aliases of that field. > > My > > other reason is that it more closely follows Rust paradigms. Creating > > multiple pointers to the same memory in Rust subverts the borrow > > checker's ability to keep track of who owns the memory. > > Sure. But looking at the use of rchg[12] in xdl_build_script(), if > they were "const char *", combined with the fact that they are local > and their addresses are never taken (to be leaked to our callers), > you wouldn't have much trouble with the current code, or would you > still have issues? For xdl_build_script() specifically it would work just fine keeping the local variable aliasing in. I think this is another case of personal preference vs established style. Which path would you prefer that I take? 1. Drop this commit and remember to refactor rchg1, rchg2 to changed1, and changed2. 2. Keep this commit with reasons like this: * Refactor churn: Later commits will refactor rchg. * No additional meaning: The local variables express the same meaning as the struct field itself. Also, the conditional and the inner loop is easy enough to follow without using the local aliases to make the code shorter. My preference is number 2.

On the Git mailing list, Junio C Hamano wrote (reply to this):

Ezekiel Newren <ezekielnewren@gmail.com> writes: > I'd agree that my reasons for making this change are insufficient. I > think usage tracking tools _is_ a weak argument, but perhaps not quite > as weak as what you're thinking. For example, when I renamed the rchg > field to changed, it was as simple as right-clicking the field, > choosing Rename, typing 'changed', and letting the IDE update every > use. Patch 11/13, "xdiff: rename rchg -> changed in xdfile_t", was > generated directly from that one action. That patch was clean because > I had already gone through and removed all the aliases of that field. If I am reading you correctly, you are describing IDE's syntax-aware editor's symbol renaming feature; I am not quite sure what it has to do with "usage tracking", which would be more of static analysis thing, no? Surely, IDE makes these symbol renaming easy and that would be one reason that makes "because we will change this part of the code in later commit" less relevant, isn't it? Whether the struct member rchg is accessed directly in the conditional and loop, or is used as the source of an assignment to a local variable, it needs to be renamed either way. And with tools, it is not as bad. >> Sure. But looking at the use of rchg[12] in xdl_build_script(), if >> they were "const char *", combined with the fact that they are local >> and their addresses are never taken (to be leaked to our callers), >> you wouldn't have much trouble with the current code, or would you >> still have issues? > > For xdl_build_script() specifically it would work just fine keeping > the local variable aliasing in. And transliterating that directly to Rust would not cause the borrow issue as you described? Then it would be great. One less thing to worry about when we need to look at C and then write an equivalent in Rust. > 2. Keep this commit with reasons like this: > * Refactor churn: Later commits will refactor rchg. > * No additional meaning: The local variables express the same > meaning as the struct field itself. Also, the conditional and the > inner loop is easy enough to follow without using the local aliases to > make the code shorter. If you have to go route #2, I can live with it, but "No additional meaning" is _not_ a valid reason to remove aliasing variables. By definition, a local variable that aliases something like a deeply nested structure member should *not* introduce any additional meaning (in other words, if the code modifies that local variable making it out of sync with the underlying structure, the variable is not without "addtional meaning" and is no longer an alias). The whole reason I asked you to justify removal of the local variable on the basis of lack of readability improvement ("the original is simple enough to read without shorter variables") was just that. "This variable is merely an alias to something else", aka "There is no meaning added by the presence of this variable", is *not* a valid reason to remove it by itself. So, with the same code but with a better justification like "the original uses a few local variables to shorten the code, but open coding the access to underlying members of nested structure without these local variables is not all that hard to read, so let's do so", would probably be an acceptable explanation with no need for other excuses, I would think. Thanks.

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Thu, Sep 25, 2025 at 1:09 AM Junio C Hamano <gitster@pobox.com> wrote: > So, with the same code but with a better justification like "the > original uses a few local variables to shorten the code, but open > coding the access to underlying members of nested structure without > these local variables is not all that hard to read, so let's do so", > would probably be an acceptable explanation with no need for other > excuses, I would think. I've changed my opinion. I'm going to drop this commit because I found myself using this design pattern in other functions, which would make me a hypocrite if this commit stays.

gitgitgadget-git · 2025-09-09T09:21:40Z

On the Git mailing list, Elijah Newren wrote (reply to this):

On Sun, Sep 7, 2025 at 12:45 PM Ezekiel Newren via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > This function aliases the fields of xrecord_t, which makes it harder > to track the usages of those fields. Delete it. > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xemit.c | 38 +++++++++++++------------------------- > 1 file changed, 13 insertions(+), 25 deletions(-) > > diff --git a/xdiff/xemit.c b/xdiff/xemit.c > index 1d40c9cb40..2161ac3cd0 100644 > --- a/xdiff/xemit.c > +++ b/xdiff/xemit.c > @@ -22,23 +22,13 @@ > > #include "xinclude.h" > > -static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { > Can we remove this line too, to simplify the diff? (i.e. make there only be one blank line between the include of xinclude.h and xdl_emit_record? > - *rec = xdf->recs[ri]->ptr; > - > - return xdf->recs[ri]->size; > -} > - > - > -static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { > - long size, psize = strlen(pre); > - char const *rec; > - > - size = xdl_get_rec(xdf, ri, &rec); > - if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { > +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) > +{ The change of the opening curly brace to be on a new line does match our general coding guidelines, but cleanups like this should be in a separate patch. > + xrecord_t *rec = xdf->recs[ri]; > > + if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) > return -1; > - } While in this case you were modifying the line in question and thus fixing the code to also not use curly braces around a single statement, which is more justified, it still makes the patch slightly harder for reviewers to read since you are doing multiple things (what you said in the commit message, plus cleaning up style "violations"). It'd be better to leave the existing style violations in place, or fix them in a separate patch. > > return 0; > } > @@ -120,11 +110,11 @@ static long def_ff(const char *rec, long len, char *buf, long sz) > static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, > char *buf, long sz) > { > - const char *rec; > - long len = xdl_get_rec(xdf, ri, &rec); > + xrecord_t *rec = xdf->recs[ri]; > + > if (!xecfg->find_func) > - return def_ff(rec, len, buf, sz); > - return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv); > + return def_ff(rec->ptr, rec->size, buf, sz); > + return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); > } > > static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) > @@ -160,14 +150,12 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, > > static int is_empty_rec(xdfile_t *xdf, long ri) > { > - const char *rec; > - long len = xdl_get_rec(xdf, ri, &rec); > + xrecord_t *rec = xdf->recs[ri]; > + long i = 0; > > - while (len > 0 && XDL_ISSPACE(*rec)) { > - rec++; > - len--; > - } > - return !len; > + for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); > + > + return i == rec->size; > } I agree that the code is easier to follow without the aliasing.

gitgitgadget-git · 2025-09-20T17:59:51Z

On the Git mailing list, Junio C Hamano wrote (reply to this):

"Ezekiel Newren via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > This function aliases the fields of xrecord_t, which makes it harder > to track the usages of those fields. Delete it. Can we phrase it in a way that is a bit more friendly to casual readers? It is hard to tell if the function is serving any useful purpose from the above. If it is doing something useful, but is hard to read the surrounding code, that wouldn't be a good reason to remove it. It seems that this patch does what people often call "inlining a function call". I am not sure your comment about aliasing or if that is why the code is harder than necessary to read. > -static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { > - long size, psize = strlen(pre); > - char const *rec; > - > - size = xdl_get_rec(xdf, ri, &rec); > - if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { So we used to get "rec" and "size" separately because xdl_get_rec() made xrecord_t inaccessible to its callers. To call a helper function that takes a <ptr, len> pair, xdl_get_rec() is the way to grab that <ptr, len> pair out of the record index. > +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) > +{ > + xrecord_t *rec = xdf->recs[ri]; > > + if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) { > return -1; > } But by directly peeking into the xdf->recs[] array, we do not have to. Each element of the array is the <ptr, len> pair we want. The updated code is certainly easier to read. That applies to all the other callers touched by this patch. Thanks.

gitgitgadget-git · 2025-09-21T13:27:40Z

On the Git mailing list, Phillip Wood wrote (reply to this):

Hi Ezekiel On 19/09/2025 16:16, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > This function aliases the fields of xrecord_t, which makes it harder > to track the usages of those fields. Delete it. Patch 6 goes the other way and introduces a getter function that hides the field accesses so I'm not sure why this one is so bad that it needs to be removed. Thanks Phillip > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xemit.c | 38 +++++++++++++------------------------- > 1 file changed, 13 insertions(+), 25 deletions(-) > > diff --git a/xdiff/xemit.c b/xdiff/xemit.c > index 1d40c9cb40..b3793e81e2 100644 > --- a/xdiff/xemit.c > +++ b/xdiff/xemit.c > @@ -22,21 +22,11 @@ > > #include "xinclude.h" > > -static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { > - > - *rec = xdf->recs[ri]->ptr; > - > - return xdf->recs[ri]->size; > -} > - > - > -static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { > - long size, psize = strlen(pre); > - char const *rec; > - > - size = xdl_get_rec(xdf, ri, &rec); > - if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { > +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) > +{ > + xrecord_t *rec = xdf->recs[ri]; > > + if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) { > return -1; > } > > @@ -120,11 +110,11 @@ static long def_ff(const char *rec, long len, char *buf, long sz) > static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, > char *buf, long sz) > { > - const char *rec; > - long len = xdl_get_rec(xdf, ri, &rec); > + xrecord_t *rec = xdf->recs[ri]; > + > if (!xecfg->find_func) > - return def_ff(rec, len, buf, sz); > - return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv); > + return def_ff(rec->ptr, rec->size, buf, sz); > + return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); > } > > static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) > @@ -160,14 +150,12 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, > > static int is_empty_rec(xdfile_t *xdf, long ri) > { > - const char *rec; > - long len = xdl_get_rec(xdf, ri, &rec); > + xrecord_t *rec = xdf->recs[ri]; > + long i = 0; > > - while (len > 0 && XDL_ISSPACE(*rec)) { > - rec++; > - len--; > - } > - return !len; > + for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); > + > + return i == rec->size; > } > > int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Sun, Sep 21, 2025 at 7:06 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > Patch 6 goes the other way and introduces a getter function that hides > the field accesses so I'm not sure why this one is so bad that it needs > to be removed. I've added a copy below of the two functions for easy reference. To quote myself[1]: ``` The fields rindex and ha of xdfile_t are specific to the classic diff (myers and minimal). I plan on creating a struct for classic diff, but there's a lot of cleanup that needs to be done before that can happen, and leaving ha in would make those cleanups harder to follow. ``` get_hash() is a scaffolding function that will reduce refactor churn. It changes a few times in this patch series alone, and will change a few more times before the code is cleaned up enough to delete it. By contrast, xdl_get_rec() merely performs an array index, which is so trivial that it doesn't justify having its own function. get_hash() reduces confusion because xdfile_t.ha is an array that is a sparse copy of xrecord_t.ha values from xdfile_t.recs. The field xrecord_t.ha is confusing on its own, as it is first used to store the hash of the line and later repurposed as a minimal perfect hash[2]. static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { *rec = xdf->recs[ri]->ptr; return xdf->recs[ri]->size; } static unsigned long get_hash(xdfile_t *xdf, long index) { return xdf->recs[xdf->rindex[index]]->ha; } [1] https://lore.kernel.org/git/0bacb1191dad2748d2afa79665f1293b0381bde1.1758294992.git.gitgitgadget@gmail.com/ [2] https://lore.kernel.org/git/af96763036e13480ed4e6dfedcade5b2c90e414c.1757274320.git.gitgitgadget@gmail.com/

gitgitgadget-git · 2025-09-30T13:48:54Z

On the Git mailing list, "Kristoffer Haugsbakk" wrote (reply to this):

On Tue, Sep 23, 2025, at 23:24, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > When xrecord_t was a linked list, and recs didn't exist, I assume this > function walked the list until it found the right record. Accessing > a contiguous array is so trival that this function is now superfluous. s/trival/trivial/ > Delete it. > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > ---

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Tue, Sep 30, 2025 at 7:31 AM Kristoffer Haugsbakk <kristofferhaugsbakk@fastmail.com> wrote: > > When xrecord_t was a linked list, and recs didn't exist, I assume this > > function walked the list until it found the right record. Accessing > > a contiguous array is so trival that this function is now superfluous. > > s/trival/trivial/ I think that other than this typo this patch series is ready to be merged in. I would prefer that Junio fix this typo, so I don't spam the mailing list with such a small change.

On the Git mailing list, Junio C Hamano wrote (reply to this):

Ezekiel Newren <ezekielnewren@gmail.com> writes: > On Tue, Sep 30, 2025 at 7:31 AM Kristoffer Haugsbakk > <kristofferhaugsbakk@fastmail.com> wrote: >> > When xrecord_t was a linked list, and recs didn't exist, I assume this >> > function walked the list until it found the right record. Accessing >> > a contiguous array is so trival that this function is now superfluous. >> >> s/trival/trivial/ > > I think that other than this typo this patch series is ready to be > merged in. I would prefer that Junio fix this typo, so I don't spam > the mailing list with such a small change. If it is only this instance, I can "rebase -i" it away, sure. Thanks.

gitgitgadget-git · 2025-09-24T10:29:11Z

On the Git mailing list, Phillip Wood wrote (reply to this):

On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > From: Ezekiel Newren <ezekielnewren@gmail.com> > > Use the type xrecord_t as the local variable for the functions in the > file xdiff/xemit.c. This explains what the change is but not why it is being made. Commit messages in this project are expected to explain the reason for the change so that future readers can understand why a change was made. Thanks Phillip > > Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> > --- > xdiff/xemit.c | 29 +++++++++++++---------------- > 1 file changed, 13 insertions(+), 16 deletions(-) > > diff --git a/xdiff/xemit.c b/xdiff/xemit.c > index 40fc8154f3..2161ac3cd0 100644 > --- a/xdiff/xemit.c > +++ b/xdiff/xemit.c > @@ -23,12 +23,11 @@ > #include "xinclude.h" > > > -static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { > - long size, psize = strlen(pre); > - char const *rec = xdf->recs[ri]->ptr; > +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) > +{ > + xrecord_t *rec = xdf->recs[ri]; > > - size = xdf->recs[ri]->size; > - if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) > + if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) > return -1; > > return 0; > @@ -111,11 +110,11 @@ static long def_ff(const char *rec, long len, char *buf, long sz) > static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, > char *buf, long sz) > { > - const char *rec = xdf->recs[ri]->ptr; > - long len = xdf->recs[ri]->size; > + xrecord_t *rec = xdf->recs[ri]; > + > if (!xecfg->find_func) > - return def_ff(rec, len, buf, sz); > - return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv); > + return def_ff(rec->ptr, rec->size, buf, sz); > + return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); > } > > static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) > @@ -151,14 +150,12 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, > > static int is_empty_rec(xdfile_t *xdf, long ri) > { > - const char *rec = xdf->recs[ri]->ptr; > - long len = xdf->recs[ri]->size; > + xrecord_t *rec = xdf->recs[ri]; > + long i = 0; > > - while (len > 0 && XDL_ISSPACE(*rec)) { > - rec++; > - len--; > - } > - return !len; > + for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); > + > + return i == rec->size; > } > > int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,

On the Git mailing list, Ezekiel Newren wrote (reply to this):

On Wed, Sep 24, 2025 at 4:21 AM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 23/09/2025 22:24, Ezekiel Newren via GitGitGadget wrote: > > From: Ezekiel Newren <ezekielnewren@gmail.com> > > > > Use the type xrecord_t as the local variable for the functions in the > > file xdiff/xemit.c. > > This explains what the change is but not why it is being made. Commit > messages in this project are expected to explain the reason for the > change so that future readers can understand why a change was made. "Use the type xrecord_t as the local variable for the functions in the file xdiff/xemit.c. This helps tools like ctags or modern IDE's more accurately follow the usage of xrecord_t." Is this commit message good enough?

-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,11 @@ @@
     #include "xinclude.h"
+    static unsigned long get_hash(xdfile_t *xdf, long index)
+    {
+    	return xdf->recs[xdf->rindex[index]].ha;
+    }
     #define XDL_MAX_COST_MIN 256
     #define XDL_HEUR_MIN_COST 256
     #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
@@ Expand All / @@ -42,8 +47,8 @@ typedef struct s_xdpsplit { @@
      * using this algorithm, so a little bit of heuristic is needed to cut the
      * search and to return a suboptimal point.
      */
-    static long xdl_split(unsigned long const *ha1, long off1, long lim1,
-    		      unsigned long const *ha2, long off2, long lim2,
+    static long xdl_split(xdfile_t *xdf1, long off1, long lim1,
+    		      xdfile_t *xdf2, long off2, long lim2,
     		      long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
     		      xdalgoenv_t *xenv) {
     	long dmin = off1 - lim2, dmax = lim1 - off2;
@@ Expand Down Expand Up @@
     				i1 = kvdf[d + 1];
     			prev1 = i1;
     			i2 = i1 - d;
-    			for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
+    			for (; i1 < lim1 && i2 < lim2 && get_hash(xdf1, i1) == get_hash(xdf2, i2); i1++, i2++);
     			if (i1 - prev1 > xenv->snake_cnt)
     				got_snake = 1;
     			kvdf[d] = i1;
@@ Expand Down Expand Up @@
     				i1 = kvdb[d + 1] - 1;
     			prev1 = i1;
     			i2 = i1 - d;
-    			for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
+    			for (; i1 > off1 && i2 > off2 && get_hash(xdf1, i1 - 1) == get_hash(xdf2, i2 - 1); i1--, i2--);
     			if (prev1 - i1 > xenv->snake_cnt)
     				got_snake = 1;
     			kvdb[d] = i1;
@@ Expand Down Expand Up @@
     				if (v > XDL_K_HEUR * ec && v > best &&
     				    off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
     				    off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
-    					for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
+    					for (k = 1; get_hash(xdf1, i1 - k) == get_hash(xdf2, i2 - k); k++)
     						if (k == xenv->snake_cnt) {
     							best = v;
     							spl->i1 = i1;
@@ Expand All @@
     				if (v > XDL_K_HEUR * ec && v > best &&
     				    off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
     				    off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
-    					for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
+    					for (k = 0; get_hash(xdf1, i1 + k) == get_hash(xdf2, i2 + k); k++)
     						if (k == xenv->snake_cnt - 1) {
     							best = v;
     							spl->i1 = i1;
@@ Expand Down Expand Up @@
      * sub-boxes by calling the box splitting function. Note that the real job
      * (marking changed lines) is done in the two boundary reaching checks.
      */
-    int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
-    		 diffdata_t *dd2, long off2, long lim2,
+    int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1,
+    		 xdfile_t *xdf2, long off2, long lim2,
     		 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
-    	unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
     	/*
     	 * Shrink the box by walking through each diagonal snake (SW and NE).
     	 */
-    	for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
-    	for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
+    	for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, off1) == get_hash(xdf2, off2); off1++, off2++);
+    	for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, lim1 - 1) == get_hash(xdf2, lim2 - 1); lim1--, lim2--);
     	/*
     	 * If one dimension is empty, then all records on the other one must
     	 * be obviously changed.
     	 */
     	if (off1 == lim1) {
-    		char *rchg2 = dd2->rchg;
-    		long *rindex2 = dd2->rindex;
     		for (; off2 < lim2; off2++)
-    			rchg2[rindex2[off2]] = 1;
+    			xdf2->changed[xdf2->rindex[off2]] = true;
     	} else if (off2 == lim2) {
-    		char *rchg1 = dd1->rchg;
-    		long *rindex1 = dd1->rindex;
     		for (; off1 < lim1; off1++)
-    			rchg1[rindex1[off1]] = 1;
+    			xdf1->changed[xdf1->rindex[off1]] = true;
     	} else {
     		xdpsplit_t spl;
     		spl.i1 = spl.i2 = 0;
     		/*
     		 * Divide ...
     		 */
-    		if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
+    		if (xdl_split(xdf1, off1, lim1, xdf2, off2, lim2, kvdf, kvdb,
     			      need_min, &spl, xenv) < 0) {
     			return -1;
@@ Expand All / @@ -300,9 +298,9 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, @@
     		/*
     		 * ... et Impera.
     		 */
-    		if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
+    		if (xdl_recs_cmp(xdf1, off1, spl.i1, xdf2, off2, spl.i2,
     				 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
-    		    xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
+    		    xdl_recs_cmp(xdf1, spl.i1, lim1, xdf2, spl.i2, lim2,
     				 kvdf, kvdb, spl.min_hi, xenv) < 0) {
     			return -1;
@@ Expand All @@
     	long ndiags;
     	long *kvd, *kvdf, *kvdb;
     	xdalgoenv_t xenv;
-    	diffdata_t dd1, dd2;
     	int res;
     	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0)
@@ Expand Down Expand Up @@
     	xenv.snake_cnt = XDL_SNAKE_CNT;
     	xenv.heur_min = XDL_HEUR_MIN_COST;
-    	dd1.nrec = xe->xdf1.nreff;
-    	dd1.ha = xe->xdf1.ha;
-    	dd1.rchg = xe->xdf1.rchg;
-    	dd1.rindex = xe->xdf1.rindex;
-    	dd2.nrec = xe->xdf2.nreff;
-    	dd2.ha = xe->xdf2.ha;
-    	dd2.rchg = xe->xdf2.rchg;
-    	dd2.rindex = xe->xdf2.rindex;
-    	res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
+    	res = xdl_recs_cmp(&xe->xdf1, 0, xe->xdf1.nreff, &xe->xdf2, 0, xe->xdf2.nreff,
     			   kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0,
     			   &xenv);
     	xdl_free(kvd);
@@ Expand Down Expand Up @@
     		m->indent = -1;
     	} else {
     		m->end_of_file = 0;
-    		m->indent = get_indent(xdf->recs[split]);
+    		m->indent = get_indent(&xdf->recs[split]);
     	}
     	m->pre_blank = 0;
     	m->pre_indent = -1;
     	for (i = split - 1; i >= 0; i--) {
-    		m->pre_indent = get_indent(xdf->recs[i]);
+    		m->pre_indent = get_indent(&xdf->recs[i]);
     		if (m->pre_indent != -1)
     			break;
     		m->pre_blank += 1;
@@ Expand All / @@ -520,7 +508,7 @@ static void measure_split(const xdfile_t *xdf, long split, @@
     	m->post_blank = 0;
     	m->post_indent = -1;
     	for (i = split + 1; i < xdf->nrec; i++) {
-    		m->post_indent = get_indent(xdf->recs[i]);
+    		m->post_indent = get_indent(&xdf->recs[i]);
     		if (m->post_indent != -1)
     			break;
     		m->post_blank += 1;
@@ Expand Down Expand Up / @@ -720,7 +708,7 @@ struct xdlgroup { @@
     static void group_init(xdfile_t *xdf, struct xdlgroup *g)
     {
     	g->start = g->end = 0;
-    	while (xdf->rchg[g->end])
+    	while (xdf->changed[g->end])
     		g->end++;
     }
@@ Expand All @@
     		return -1;
     	g->start = g->end + 1;
-    	for (g->end = g->start; xdf->rchg[g->end]; g->end++)
+    	for (g->end = g->start; xdf->changed[g->end]; g->end++)
     		;
     	return 0;
@@ Expand All @@
     		return -1;
     	g->end = g->start - 1;
-    	for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
+    	for (g->start = g->end; xdf->changed[g->start - 1]; g->start--)
     		;
     	return 0;
@@ Expand All @@
     static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
     {
     	if (g->end < xdf->nrec &&
-    	    recs_match(xdf->recs[g->start], xdf->recs[g->end])) {
-    		xdf->rchg[g->start++] = 0;
-    		xdf->rchg[g->end++] = 1;
+    	    recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) {
+    		xdf->changed[g->start++] = false;
+    		xdf->changed[g->end++] = true;
-    		while (xdf->rchg[g->end])
+    		while (xdf->changed[g->end])
     			g->end++;
     		return 0;
@@ Expand All @@
     static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
     {
     	if (g->start > 0 &&
-    	    recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) {
-    		xdf->rchg[--g->start] = 1;
-    		xdf->rchg[--g->end] = 0;
+    	    recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) {
+    		xdf->changed[--g->start] = true;
+    		xdf->changed[--g->end] = false;
-    		while (xdf->rchg[g->start - 1])
+    		while (xdf->changed[g->start - 1])
     			g->start--;
     		return 0;
@@ Expand Down Expand Up @@
     int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
     	xdchange_t *cscr = NULL, *xch;
-    	char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
+    	bool *changed1 = xe->xdf1.changed, *changed2 = xe->xdf2.changed;
     	long i1, i2, l1, l2;
     	/*
     	 * Trivial. Collects "groups" of changes and creates an edit script.
     	 */
     	for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
-    		if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
-    			for (l1 = i1; rchg1[i1 - 1]; i1--);
-    			for (l2 = i2; rchg2[i2 - 1]; i2--);
+    		if (changed1[i1 - 1] || changed2[i2 - 1]) {
+    			for (l1 = i1; changed1[i1 - 1]; i1--);
+    			for (l2 = i2; changed2[i2 - 1]; i2--);
     			if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
     				xdl_free_script(cscr);
@@ Expand Down Expand Up @@
     	for (xch = xscr; xch; xch = xch->next) {
     		int ignore = 1;
-    		xrecord_t **rec;
+    		xrecord_t *rec;
     		long i;
     		rec = &xe->xdf1.recs[xch->i1];
     		for (i = 0; i < xch->chg1 && ignore; i++)
-    			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+    			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);
     		rec = &xe->xdf2.recs[xch->i2];
     		for (i = 0; i < xch->chg2 && ignore; i++)
-    			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+    			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);
     		xch->ignore = ignore;
     	}
@@ Expand All @@
     	xdchange_t *xch;
     	for (xch = xscr; xch; xch = xch->next) {
-    		xrecord_t **rec;
+    		xrecord_t *rec;
     		int ignore = 1;
     		long i;
@@ Expand All @@
     		rec = &xe->xdf1.recs[xch->i1];
     		for (i = 0; i < xch->chg1 && ignore; i++)
-    			ignore = record_matches_regex(rec[i], xpp);
+    			ignore = record_matches_regex(&rec[i], xpp);
     		rec = &xe->xdf2.recs[xch->i2];
     		for (i = 0; i < xch->chg2 && ignore; i++)
-    			ignore = record_matches_regex(rec[i], xpp);
+    			ignore = record_matches_regex(&rec[i], xpp);
     		xch->ignore = ignore;
     	}
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -24,13 +24,6 @@ @@
     #define XDIFFI_H
-    typedef struct s_diffdata {
-    	long nrec;
-    	unsigned long const *ha;
-    	long *rindex;
-    	char *rchg;
-    } diffdata_t;
     typedef struct s_xdalgoenv {
     	long mxcost;
     	long snake_cnt;
@@ Expand All / @@ -46,8 +39,8 @@ typedef struct s_xdchange { @@
-    int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
-    		 diffdata_t *dd2, long off2, long lim2,
+    int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1,
+    		 xdfile_t *xdf2, long off2, long lim2,
     		 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
     int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
     		xdfenv_t *xe);
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -22,23 +22,13 @@ @@
     #include "xinclude.h"
-    static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {
-    	*rec = xdf->recs[ri]->ptr;
-    	return xdf->recs[ri]->size;
-    }
-    static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) {
-    	long size, psize = strlen(pre);
-    	char const *rec;
-    	size = xdl_get_rec(xdf, ri, &rec);
-    	if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) {
+    static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb)
+    {
+    	xrecord_t *rec = &xdf->recs[ri];
+    	if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
     		return -1;
-    	}
     	return 0;
     }
@@ Expand Down Expand Up @@
     static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
     			   char *buf, long sz)
     {
-    	const char *rec;
-    	long len = xdl_get_rec(xdf, ri, &rec);
+    	xrecord_t *rec = &xdf->recs[ri];
     	if (!xecfg->find_func)
-    		return def_ff(rec, len, buf, sz);
-    	return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv);
+    		return def_ff(rec->ptr, rec->size, buf, sz);
+    	return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
     }
     static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
@@ Expand Down Expand Up @@
     static int is_empty_rec(xdfile_t *xdf, long ri)
     {
-    	const char *rec;
-    	long len = xdl_get_rec(xdf, ri, &rec);
+    	xrecord_t *rec = &xdf->recs[ri];
+    	long i = 0;
-    	while (len > 0 && XDL_ISSPACE(*rec)) {
-    		rec++;
-    		len--;
-    	}
-    	return !len;
+    	for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);
+    	return i == rec->size;
     }
     int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
@@ Expand Down @@

Cleanup xdfile_t and xrecord_t in xdiff. #2048

Uh oh!

Cleanup xdfile_t and xrecord_t in xdiff. #2048

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 10, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 21, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 21, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 21, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 21, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 24, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 25, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 25, 2025

Choose a reason for hiding this comment

Uh oh!

gitgitgadget-git bot Sep 9, 2025

Choose a reason for hiding this comment

Uh oh!