From d87e85bd3274e711b19929cd7265aedd24bf758f Mon Sep 17 00:00:00 2001 From: Shubham Kanodia Date: Mon, 2 Sep 2024 17:12:13 +0530 Subject: [PATCH] maintenance: prefetch config for remotes/refs Large repositories often contain numerous branches and refs, many of which individual users may not need. This commit introduces a new configuration option (`maintenance.prefetch..refs`) to allow users to specify which remotes and refs should be prefetched during the maintenance task. Key behaviors: 1. If no configuration is set, all remotes and refs are prefetched (preserving the current behavior). 2. If any configuration is set, only the specified remotes and refs are prefetched. 3. Remotes without configuration are skipped if any configuration exists. This change allows users to optimize their prefetch operations, potentially reducing network traffic and disk usage, especially for large repositories with many branches. Signed-off-by: Shubham Kanodia --- Documentation/config/maintenance.txt | 18 +++++ builtin/gc.c | 113 +++++++++++++++++++++++++-- t/t7900-maintenance.sh | 77 ++++++++++-------- 3 files changed, 167 insertions(+), 41 deletions(-) diff --git a/Documentation/config/maintenance.txt b/Documentation/config/maintenance.txt index 72a9d6cf816928..3806c93db7abd5 100644 --- a/Documentation/config/maintenance.txt +++ b/Documentation/config/maintenance.txt @@ -69,3 +69,21 @@ maintenance.incremental-repack.auto:: Otherwise, a positive value implies the command should run when the number of pack-files not in the multi-pack-index is at least the value of `maintenance.incremental-repack.auto`. The default value is 10. + +maintenance.prefetch..refs:: + This multi-valued config option specifies which refs to prefetch + for each remote during the prefetch maintenance task. Each value + of this option is a refspec source that will be used when fetching from + the specified remote. This is useful for large active repositories where + fetching all refs and remotes might not be very efficient. ++ +For example, to prefetch only the master branch from the origin remote, +and all branches from the upstream remote, you would use: ++ +---- +$ git config maintenance.prefetch.origin.refs refs/heads/master +$ git config maintenance.prefetch.upstream.refs refs/heads/* +---- ++ +If this option is not set for a remote, the prefetch task will use +the default behavior of fetching all refs from all remotes. \ No newline at end of file diff --git a/builtin/gc.c b/builtin/gc.c index 427faf1cfe1bdb..617f4fbf8a9e8a 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -17,6 +17,7 @@ #include "hex.h" #include "repository.h" #include "config.h" +#include "string-list.h" #include "tempfile.h" #include "lockfile.h" #include "parse-options.h" @@ -246,6 +247,7 @@ struct maintenance_run_opts { int quiet; enum schedule_priority schedule; }; + #define MAINTENANCE_RUN_OPTS_INIT { \ .detach = -1, \ } @@ -880,6 +882,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix) return 0; } +struct maintenance_config { + struct prefetch_config_list { + struct prefetch_config { + char *remote; + struct string_list refs; + } *items; + int nr, alloc; + } prefetch; +}; + +#define MAINTENANCE_CONFIG_INIT { \ + .prefetch = { NULL, 0, 0 }, \ +} + +static struct maintenance_config maintenance_cfg = MAINTENANCE_CONFIG_INIT; + static const char *const builtin_maintenance_run_usage[] = { N_("git maintenance run [--auto] [--[no-]quiet] [--task=] [--schedule]"), NULL @@ -1023,22 +1041,94 @@ static int fetch_remote(struct remote *remote, void *cbdata) { struct maintenance_run_opts *opts = cbdata; struct child_process child = CHILD_PROCESS_INIT; + struct prefetch_config *prefetch_cfg = NULL; + static int has_prefetch_cfg = -1; // -1: unknown, 0: no config, 1: config exists if (remote->skip_default_update) return 0; + if (has_prefetch_cfg == -1) + has_prefetch_cfg = (maintenance_cfg.prefetch.nr > 0); + + if (has_prefetch_cfg) { + for (int i = 0; i < maintenance_cfg.prefetch.nr; i++) { + if (!strcmp(remote->name, maintenance_cfg.prefetch.items[i].remote)) { + prefetch_cfg = &maintenance_cfg.prefetch.items[i]; + break; + } + } + + if (!prefetch_cfg) + return 0; + } + child.git_cmd = 1; - strvec_pushl(&child.args, "fetch", remote->name, - "--prefetch", "--prune", "--no-tags", - "--no-write-fetch-head", "--recurse-submodules=no", - NULL); + strvec_pushl(&child.args, "fetch", remote->name, "--prefetch", "--prune", "--no-tags", + "--no-write-fetch-head", "--recurse-submodules=no", NULL); if (opts->quiet) strvec_push(&child.args, "--quiet"); + if (prefetch_cfg && prefetch_cfg->refs.nr > 0) { + struct string_list_item *item; + for_each_string_list_item(item, &prefetch_cfg->refs) + strvec_pushf(&child.args, "%s:%s", item->string, item->string); + } + return !!run_command(&child); } +static int maintenance_config_callback(const char *key, const char *value, + const struct config_context *ctx, + void *data) +{ + struct maintenance_config *config = data; + const char *remote_name; + const char *refs_key; + struct prefetch_config *pc; + struct strbuf name = STRBUF_INIT; + + if (!skip_prefix(key, "maintenance.prefetch.", &remote_name)) + return 0; + + refs_key = strrchr(remote_name, '.'); + if (!refs_key || strcmp(refs_key + 1, "refs")) + return 0; + + strbuf_add(&name, remote_name, refs_key - remote_name); + + REALLOC_ARRAY(config->prefetch.items, config->prefetch.nr + 1); + pc = &config->prefetch.items[config->prefetch.nr++]; + pc->remote = strbuf_detach(&name, NULL); + string_list_init_dup(&pc->refs); + pc->refs.strdup_strings = 1; + string_list_split(&pc->refs, value, ' ', -1); + + return 0; +} + +static void maintenance_config_read(struct maintenance_config *config) +{ + if (git_config(maintenance_config_callback, config) < 0) + die(_("failed to read maintenance configuration")); +} + +static void maintenance_config_release(struct maintenance_config *config) +{ + int i; + + if (!config->prefetch.items) + return; + + for (i = 0; i < config->prefetch.nr; i++) { + free(config->prefetch.items[i].remote); + string_list_clear(&config->prefetch.items[i].refs, 1); + } + + free(config->prefetch.items); + memset(config, 0, sizeof(*config)); +} + static int maintenance_task_prefetch(struct maintenance_run_opts *opts, struct gc_config *cfg) { @@ -1563,7 +1653,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) { int i; struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; - struct gc_config cfg = GC_CONFIG_INIT; + struct gc_config gc_cfg = GC_CONFIG_INIT; struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), @@ -1579,8 +1669,11 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) PARSE_OPT_NONEG, task_option_parse), OPT_END() }; + int ret; + maintenance_config_read(&maintenance_cfg); + opts.quiet = !isatty(2); for (i = 0; i < TASK__COUNT; i++) @@ -1591,18 +1684,22 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) builtin_maintenance_run_usage, PARSE_OPT_STOP_AT_NON_OPTION); + + maintenance_config_read(&maintenance_cfg); + if (opts.auto_flag && opts.schedule) die(_("use at most one of --auto and --schedule=")); - gc_config(&cfg); + gc_config(&gc_cfg); initialize_task_config(opts.schedule); if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); - ret = maintenance_run_tasks(&opts, &cfg); - gc_config_release(&cfg); + ret = maintenance_run_tasks(&opts, &gc_cfg); + gc_config_release(&gc_cfg); + maintenance_config_release(&maintenance_cfg); return ret; } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546f66..9206b7e5ed6330 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -223,6 +223,7 @@ test_expect_success 'prefetch multiple remotes' ' git -C clone2 switch -c two && test_commit -C clone1 one && test_commit -C clone2 two && + GIT_TRACE2_EVENT="$(pwd)/run-prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && test_subcommand git fetch remote1 $fetchargs .git/objects/maintenance.lock && + GIT_TRACE2_EVENT="$(pwd)/run-prefetch-default.txt" git maintenance run --task=prefetch 2>/dev/null && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + test_subcommand git fetch remote1 $fetchargs actual && + test_must_be_empty actual && + git log prefetch/remotes/remote1/one && + git log prefetch/remotes/remote2/two && - ls .git/objects >obj-dir-before && - test_file_not_empty obj-dir-before && - ls .git/objects/pack/*.pack >packs-before && - test_line_count = 1 packs-before && + git fetch --all && + test_cmp_rev refs/remotes/remote1/one refs/prefetch/remotes/remote1/one && + test_cmp_rev refs/remotes/remote2/two refs/prefetch/remotes/remote2/two +' - # The first run creates a pack-file - # but does not delete loose objects. - git maintenance run --task=loose-objects && - ls .git/objects >obj-dir-between && - test_cmp obj-dir-before obj-dir-between && - ls .git/objects/pack/*.pack >packs-between && - test_line_count = 2 packs-between && - ls .git/objects/pack/loose-*.pack >loose-packs && - test_line_count = 1 loose-packs && - - # The second run deletes loose objects - # but does not create a pack-file. - git maintenance run --task=loose-objects && - ls .git/objects >obj-dir-after && - cat >expect <<-\EOF && - info - pack - EOF - test_cmp expect obj-dir-after && - ls .git/objects/pack/*.pack >packs-after && - test_cmp packs-between packs-after +test_expect_success 'prefetch with configurable remotes' ' + git clone . clone1 && + git clone . clone2 && + git remote add remote1 "file://$(pwd)/clone1" && + git remote add remote2 "file://$(pwd)/clone2" && + git -C clone1 switch -c one && + git -C clone2 switch -c two && + test_commit -C clone1 one && + test_commit -C clone2 two && + + git config maintenance.prefetch.remote1.refs "refs/heads/one" && + + GIT_TRACE2_EVENT="$(pwd)/run-prefetch-config.txt" git maintenance run --task=prefetch 2>/dev/null && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + cat run-prefetch-config.txt && + test_subcommand git fetch remote1 $fetchargs refs/heads/one:refs/heads/one actual && + test_must_be_empty actual && + git log prefetch/remotes/remote1/one && + test_must_fail git log prefetch/remotes/remote2/two ' test_expect_success 'maintenance.loose-objects.auto' '