From 4ce5cf77005eb8c6da243777b3c29103add7ddbd Mon Sep 17 00:00:00 2001 From: Linus Arver Date: Thu, 14 Sep 2023 00:53:14 -0700 Subject: [PATCH 1/3] commit: ignore_non_trailer computes number of bytes to ignore ignore_non_trailer() returns the _number of bytes_ that should be ignored from the end of the log message. It does not by itself "ignore" anything. Rename this function to remove the leading "ignore" verb, to sound more like a quantity than an action. Signed-off-by: Linus Arver --- builtin/commit.c | 2 +- builtin/merge.c | 2 +- commit.c | 2 +- commit.h | 4 ++-- trailer.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 7da5f924484d58..d1785d32db1bc1 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -900,7 +900,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix, strbuf_stripspace(&sb, '\0'); if (signoff) - append_signoff(&sb, ignore_non_trailer(sb.buf, sb.len), 0); + append_signoff(&sb, ignored_log_message_bytes(sb.buf, sb.len), 0); if (fwrite(sb.buf, 1, sb.len, s->fp) < sb.len) die_errno(_("could not write commit template")); diff --git a/builtin/merge.c b/builtin/merge.c index 545da0c8a1106f..c654a29fe85110 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -870,7 +870,7 @@ static void prepare_to_commit(struct commit_list *remoteheads) _(no_scissors_editor_comment), comment_line_char); } if (signoff) - append_signoff(&msg, ignore_non_trailer(msg.buf, msg.len), 0); + append_signoff(&msg, ignored_log_message_bytes(msg.buf, msg.len), 0); write_merge_heads(remoteheads); write_file_buf(git_path_merge_msg(the_repository), msg.buf, msg.len); if (run_commit_hook(0 < option_edit, get_index_file(), NULL, diff --git a/commit.c b/commit.c index b3223478bc2a3a..4440fbabb83886 100644 --- a/commit.c +++ b/commit.c @@ -1769,7 +1769,7 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len * Returns the number of bytes from the tail to ignore, to be fed as * the second parameter to append_signoff(). */ -size_t ignore_non_trailer(const char *buf, size_t len) +size_t ignored_log_message_bytes(const char *buf, size_t len) { size_t boc = 0; size_t bol = 0; diff --git a/commit.h b/commit.h index 28928833c54408..1cc872f225f438 100644 --- a/commit.h +++ b/commit.h @@ -294,8 +294,8 @@ const char *find_header_mem(const char *msg, size_t len, const char *find_commit_header(const char *msg, const char *key, size_t *out_len); -/* Find the end of the log message, the right place for a new trailer. */ -size_t ignore_non_trailer(const char *buf, size_t len); +/* Find the number of bytes to ignore from the end of a log message. */ +size_t ignored_log_message_bytes(const char *buf, size_t len); typedef int (*each_mergetag_fn)(struct commit *commit, struct commit_extra_header *extra, void *cb_data); diff --git a/trailer.c b/trailer.c index b6de5d9cb2dab2..3c54b38a85a798 100644 --- a/trailer.c +++ b/trailer.c @@ -928,7 +928,7 @@ static size_t find_trailer_start(const char *buf, size_t len) /* Return the position of the end of the trailers. */ static size_t find_trailer_end(const char *buf, size_t len) { - return len - ignore_non_trailer(buf, len); + return len - ignored_log_message_bytes(buf, len); } static int ends_with_blank_line(const char *buf, size_t len) From ce25420db29c9953095db652584dbed4e35d67ad Mon Sep 17 00:00:00 2001 From: Linus Arver Date: Mon, 31 Jul 2023 22:44:35 -0700 Subject: [PATCH 2/3] trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan Helped-by: Junio C Hamano Signed-off-by: Linus Arver --- trailer.c | 64 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/trailer.c b/trailer.c index 3c54b38a85a798..70c81fda710928 100644 --- a/trailer.c +++ b/trailer.c @@ -809,21 +809,50 @@ static ssize_t last_line(const char *buf, size_t len) } /* - * Return the position of the start of the patch or the length of str if there - * is no patch in the message. + * Find the end of the log message as an offset from the start of the input + * (where callers of this function are interested in looking for a trailers + * block in the same input). We have to consider two categories of content that + * can come at the end of the input which we want to ignore (because they don't + * belong in the log message): + * + * (1) the "patch part" which begins with a "---" divider and has patch + * information (like the output of git-format-patch), and + * + * (2) any trailing comment lines, blank lines like in the output of "git + * commit -v", or stuff below the "cut" (scissor) line. + * + * As a formula, the situation looks like this: + * + * INPUT = LOG MESSAGE + IGNORED + * + * where IGNORED can be either of the two categories described above. It may be + * that there is nothing to ignore. Now it may be the case that the LOG MESSAGE + * contains a trailer block, but that's not the concern of this function. */ -static size_t find_patch_start(const char *str) +static size_t find_end_of_log_message(const char *input, int no_divider) { + size_t end; const char *s; - for (s = str; *s; s = next_line(s)) { + /* Assume the naive end of the input is already what we want. */ + end = strlen(input); + + if (no_divider) { + return end; + } + + /* Optionally skip over any patch part ("---" line and below). */ + for (s = input; *s; s = next_line(s)) { const char *v; - if (skip_prefix(s, "---", &v) && isspace(*v)) - return s - str; + if (skip_prefix(s, "---", &v) && isspace(*v)) { + end = s - input; + break; + } } - return s - str; + /* Skip over other ignorable bits. */ + return end - ignored_log_message_bytes(input, end); } /* @@ -925,12 +954,6 @@ static size_t find_trailer_start(const char *buf, size_t len) return len; } -/* Return the position of the end of the trailers. */ -static size_t find_trailer_end(const char *buf, size_t len) -{ - return len - ignored_log_message_bytes(buf, len); -} - static int ends_with_blank_line(const char *buf, size_t len) { ssize_t ll = last_line(buf, len); @@ -1101,7 +1124,7 @@ void process_trailers(const char *file, void trailer_info_get(struct trailer_info *info, const char *str, const struct process_trailer_options *opts) { - int patch_start, trailer_end, trailer_start; + int end_of_log_message, trailer_start; struct strbuf **trailer_lines, **ptr; char **trailer_strings = NULL; size_t nr = 0, alloc = 0; @@ -1109,16 +1132,11 @@ void trailer_info_get(struct trailer_info *info, const char *str, ensure_configured(); - if (opts->no_divider) - patch_start = strlen(str); - else - patch_start = find_patch_start(str); - - trailer_end = find_trailer_end(str, patch_start); - trailer_start = find_trailer_start(str, trailer_end); + end_of_log_message = find_end_of_log_message(str, opts->no_divider); + trailer_start = find_trailer_start(str, end_of_log_message); trailer_lines = strbuf_split_buf(str + trailer_start, - trailer_end - trailer_start, + end_of_log_message - trailer_start, '\n', 0); for (ptr = trailer_lines; *ptr; ptr++) { @@ -1141,7 +1159,7 @@ void trailer_info_get(struct trailer_info *info, const char *str, info->blank_line_before_trailer = ends_with_blank_line(str, trailer_start); info->trailer_start = str + trailer_start; - info->trailer_end = str + trailer_end; + info->trailer_end = str + end_of_log_message; info->trailers = trailer_strings; info->trailer_nr = nr; } From e3a7b150241c2d997026b8ccf7c88ecfecdfe4e7 Mon Sep 17 00:00:00 2001 From: Linus Arver Date: Fri, 8 Sep 2023 17:00:13 -0700 Subject: [PATCH 3/3] trailer: use offsets for trailer_start/trailer_end Previously these fields in the trailer_info struct were of type "const char *" and pointed to positions in the input string directly (to the start and end positions of the trailer block). Use offsets to make the intended usage less ambiguous. We only need to reference the input string in format_trailer_info(), so update that function to take a pointer to the input. While we're at it, rename trailer_start to trailer_block_start to be more explicit about these offsets (that they are for the entire trailer block including other trailers). Ditto for trailer_end. Reported-by: Glen Choo Signed-off-by: Linus Arver --- sequencer.c | 2 +- trailer.c | 29 ++++++++++++++--------------- trailer.h | 10 +++++----- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/sequencer.c b/sequencer.c index d584cac8ed9307..8707a92204f1d6 100644 --- a/sequencer.c +++ b/sequencer.c @@ -345,7 +345,7 @@ static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob, if (ignore_footer) sb->buf[sb->len - ignore_footer] = saved_char; - if (info.trailer_start == info.trailer_end) + if (info.trailer_block_start == info.trailer_block_end) return 0; for (i = 0; i < info.trailer_nr; i++) diff --git a/trailer.c b/trailer.c index 70c81fda710928..f7dc7c4c008ed9 100644 --- a/trailer.c +++ b/trailer.c @@ -859,7 +859,7 @@ static size_t find_end_of_log_message(const char *input, int no_divider) * Return the position of the first trailer line or len if there are no * trailers. */ -static size_t find_trailer_start(const char *buf, size_t len) +static size_t find_trailer_block_start(const char *buf, size_t len) { const char *s; ssize_t end_of_title, l; @@ -1075,7 +1075,6 @@ void process_trailers(const char *file, LIST_HEAD(head); struct strbuf sb = STRBUF_INIT; struct trailer_info info; - size_t trailer_end; FILE *outfile = stdout; ensure_configured(); @@ -1086,11 +1085,10 @@ void process_trailers(const char *file, outfile = create_in_place_tempfile(file); parse_trailers(&info, sb.buf, &head, opts); - trailer_end = info.trailer_end - sb.buf; /* Print the lines before the trailers */ if (!opts->only_trailers) - fwrite(sb.buf, 1, info.trailer_start - sb.buf, outfile); + fwrite(sb.buf, 1, info.trailer_block_start, outfile); if (!opts->only_trailers && !info.blank_line_before_trailer) fprintf(outfile, "\n"); @@ -1112,7 +1110,7 @@ void process_trailers(const char *file, /* Print the lines after the trailers as is */ if (!opts->only_trailers) - fwrite(sb.buf + trailer_end, 1, sb.len - trailer_end, outfile); + fwrite(sb.buf + info.trailer_block_end, 1, sb.len - info.trailer_block_end, outfile); if (opts->in_place) if (rename_tempfile(&trailers_tempfile, file)) @@ -1124,7 +1122,7 @@ void process_trailers(const char *file, void trailer_info_get(struct trailer_info *info, const char *str, const struct process_trailer_options *opts) { - int end_of_log_message, trailer_start; + size_t end_of_log_message = 0, trailer_block_start = 0; struct strbuf **trailer_lines, **ptr; char **trailer_strings = NULL; size_t nr = 0, alloc = 0; @@ -1133,10 +1131,10 @@ void trailer_info_get(struct trailer_info *info, const char *str, ensure_configured(); end_of_log_message = find_end_of_log_message(str, opts->no_divider); - trailer_start = find_trailer_start(str, end_of_log_message); + trailer_block_start = find_trailer_block_start(str, end_of_log_message); - trailer_lines = strbuf_split_buf(str + trailer_start, - end_of_log_message - trailer_start, + trailer_lines = strbuf_split_buf(str + trailer_block_start, + end_of_log_message - trailer_block_start, '\n', 0); for (ptr = trailer_lines; *ptr; ptr++) { @@ -1157,9 +1155,9 @@ void trailer_info_get(struct trailer_info *info, const char *str, strbuf_list_free(trailer_lines); info->blank_line_before_trailer = ends_with_blank_line(str, - trailer_start); - info->trailer_start = str + trailer_start; - info->trailer_end = str + end_of_log_message; + trailer_block_start); + info->trailer_block_start = trailer_block_start; + info->trailer_block_end = end_of_log_message; info->trailers = trailer_strings; info->trailer_nr = nr; } @@ -1174,6 +1172,7 @@ void trailer_info_release(struct trailer_info *info) static void format_trailer_info(struct strbuf *out, const struct trailer_info *info, + const char *msg, const struct process_trailer_options *opts) { size_t origlen = out->len; @@ -1183,8 +1182,8 @@ static void format_trailer_info(struct strbuf *out, if (!opts->only_trailers && !opts->unfold && !opts->filter && !opts->separator && !opts->key_only && !opts->value_only && !opts->key_value_separator) { - strbuf_add(out, info->trailer_start, - info->trailer_end - info->trailer_start); + strbuf_add(out, msg + info->trailer_block_start, + info->trailer_block_end - info->trailer_block_start); return; } @@ -1238,7 +1237,7 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg, struct trailer_info info; trailer_info_get(&info, msg, opts); - format_trailer_info(out, &info, opts); + format_trailer_info(out, &info, msg, opts); trailer_info_release(&info); } diff --git a/trailer.h b/trailer.h index ab2cd017567b3f..1644cd05f60d9f 100644 --- a/trailer.h +++ b/trailer.h @@ -32,16 +32,16 @@ int trailer_set_if_missing(enum trailer_if_missing *item, const char *value); struct trailer_info { /* * True if there is a blank line before the location pointed to by - * trailer_start. + * trailer_block_start. */ int blank_line_before_trailer; /* - * Pointers to the start and end of the trailer block found. If there - * is no trailer block found, these 2 pointers point to the end of the - * input string. + * Offsets to the trailer block start and end positions in the input + * string. If no trailer block is found, these are both set to the + * "true" end of the input (find_end_of_log_message()). */ - const char *trailer_start, *trailer_end; + size_t trailer_block_start, trailer_block_end; /* * Array of trailers found.