Skip to content

Commit

Permalink
maintenance: prefetch config for remotes/refs
Browse files Browse the repository at this point in the history
Large repositories often contain numerous branches and refs, many of which individual users may not need.

This commit introduces a new configuration option (`maintenance.prefetch.<remote>.refs`) to allow users to specify which remotes and refs should be prefetched during the maintenance task.

Key behaviors:
1. If no configuration is set, all remotes and refs are prefetched (preserving the current behavior).
2. If any configuration is set, only the specified remotes and refs are prefetched.
3. Remotes without configuration are skipped if any configuration exists.

This change allows users to optimize their prefetch operations, potentially reducing network traffic and disk usage, especially for large repositories with many branches.

Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
  • Loading branch information
pastelsky committed Sep 2, 2024
1 parent 4590f2e commit d87e85b
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 41 deletions.
18 changes: 18 additions & 0 deletions Documentation/config/maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,21 @@ maintenance.incremental-repack.auto::
Otherwise, a positive value implies the command should run when the
number of pack-files not in the multi-pack-index is at least the value
of `maintenance.incremental-repack.auto`. The default value is 10.

maintenance.prefetch.<remote>.refs::
This multi-valued config option specifies which refs to prefetch
for each remote during the prefetch maintenance task. Each value
of this option is a refspec source that will be used when fetching from
the specified remote. This is useful for large active repositories where
fetching all refs and remotes might not be very efficient.
+
For example, to prefetch only the master branch from the origin remote,
and all branches from the upstream remote, you would use:
+
----
$ git config maintenance.prefetch.origin.refs refs/heads/master
$ git config maintenance.prefetch.upstream.refs refs/heads/*
----
+
If this option is not set for a remote, the prefetch task will use
the default behavior of fetching all refs from all remotes.
113 changes: 105 additions & 8 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "hex.h"
#include "repository.h"
#include "config.h"
#include "string-list.h"
#include "tempfile.h"
#include "lockfile.h"
#include "parse-options.h"
Expand Down Expand Up @@ -246,6 +247,7 @@ struct maintenance_run_opts {
int quiet;
enum schedule_priority schedule;
};

#define MAINTENANCE_RUN_OPTS_INIT { \
.detach = -1, \
}
Expand Down Expand Up @@ -880,6 +882,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
return 0;
}

struct maintenance_config {
struct prefetch_config_list {
struct prefetch_config {
char *remote;
struct string_list refs;
} *items;
int nr, alloc;
} prefetch;
};

#define MAINTENANCE_CONFIG_INIT { \
.prefetch = { NULL, 0, 0 }, \
}

static struct maintenance_config maintenance_cfg = MAINTENANCE_CONFIG_INIT;

static const char *const builtin_maintenance_run_usage[] = {
N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>] [--schedule]"),
NULL
Expand Down Expand Up @@ -1023,22 +1041,94 @@ static int fetch_remote(struct remote *remote, void *cbdata)
{
struct maintenance_run_opts *opts = cbdata;
struct child_process child = CHILD_PROCESS_INIT;
struct prefetch_config *prefetch_cfg = NULL;
static int has_prefetch_cfg = -1; // -1: unknown, 0: no config, 1: config exists

if (remote->skip_default_update)
return 0;

if (has_prefetch_cfg == -1)
has_prefetch_cfg = (maintenance_cfg.prefetch.nr > 0);

if (has_prefetch_cfg) {
for (int i = 0; i < maintenance_cfg.prefetch.nr; i++) {
if (!strcmp(remote->name, maintenance_cfg.prefetch.items[i].remote)) {
prefetch_cfg = &maintenance_cfg.prefetch.items[i];
break;
}
}

if (!prefetch_cfg)
return 0;
}

child.git_cmd = 1;
strvec_pushl(&child.args, "fetch", remote->name,
"--prefetch", "--prune", "--no-tags",
"--no-write-fetch-head", "--recurse-submodules=no",
NULL);
strvec_pushl(&child.args, "fetch", remote->name, "--prefetch", "--prune", "--no-tags",
"--no-write-fetch-head", "--recurse-submodules=no", NULL);

if (opts->quiet)
strvec_push(&child.args, "--quiet");

if (prefetch_cfg && prefetch_cfg->refs.nr > 0) {
struct string_list_item *item;
for_each_string_list_item(item, &prefetch_cfg->refs)
strvec_pushf(&child.args, "%s:%s", item->string, item->string);
}

return !!run_command(&child);
}

static int maintenance_config_callback(const char *key, const char *value,
const struct config_context *ctx,
void *data)
{
struct maintenance_config *config = data;
const char *remote_name;
const char *refs_key;
struct prefetch_config *pc;
struct strbuf name = STRBUF_INIT;

if (!skip_prefix(key, "maintenance.prefetch.", &remote_name))
return 0;

refs_key = strrchr(remote_name, '.');
if (!refs_key || strcmp(refs_key + 1, "refs"))
return 0;

strbuf_add(&name, remote_name, refs_key - remote_name);

REALLOC_ARRAY(config->prefetch.items, config->prefetch.nr + 1);
pc = &config->prefetch.items[config->prefetch.nr++];
pc->remote = strbuf_detach(&name, NULL);
string_list_init_dup(&pc->refs);
pc->refs.strdup_strings = 1;
string_list_split(&pc->refs, value, ' ', -1);

return 0;
}

static void maintenance_config_read(struct maintenance_config *config)
{
if (git_config(maintenance_config_callback, config) < 0)

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / win build

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux32 (daald/ubuntu32:xenial)

builtin/gc.c:1112:6: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-musl (alpine)

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / pedantic (fedora)

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-asan-ubsan (ubuntu-latest)

builtin/gc.c:1112:54: invalid operands to binary expression ('void' and 'int')

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-gcc (ubuntu-20.04)

builtin/gc.c:1112:6: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-gcc-default (ubuntu-latest)

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-leaks (ubuntu-latest)

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-reftable-leaks (ubuntu-latest)

builtin/gc.c:1112:13: void value not ignored as it ought to be

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-reftable (ubuntu-latest)

builtin/gc.c:1112:54: invalid operands to binary expression ('void' and 'int')

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-sha256 (ubuntu-latest)

builtin/gc.c:1112:54: invalid operands to binary expression ('void' and 'int')

Check failure on line 1112 in builtin/gc.c

View workflow job for this annotation

GitHub Actions / linux-TEST-vars (ubuntu-20.04)

builtin/gc.c:1112:6: void value not ignored as it ought to be
die(_("failed to read maintenance configuration"));
}

static void maintenance_config_release(struct maintenance_config *config)
{
int i;

if (!config->prefetch.items)
return;

for (i = 0; i < config->prefetch.nr; i++) {
free(config->prefetch.items[i].remote);
string_list_clear(&config->prefetch.items[i].refs, 1);
}

free(config->prefetch.items);
memset(config, 0, sizeof(*config));
}

static int maintenance_task_prefetch(struct maintenance_run_opts *opts,
struct gc_config *cfg)
{
Expand Down Expand Up @@ -1563,7 +1653,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
{
int i;
struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT;
struct gc_config cfg = GC_CONFIG_INIT;
struct gc_config gc_cfg = GC_CONFIG_INIT;
struct option builtin_maintenance_run_options[] = {
OPT_BOOL(0, "auto", &opts.auto_flag,
N_("run tasks based on the state of the repository")),
Expand All @@ -1579,8 +1669,11 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
PARSE_OPT_NONEG, task_option_parse),
OPT_END()
};

int ret;

maintenance_config_read(&maintenance_cfg);

opts.quiet = !isatty(2);

for (i = 0; i < TASK__COUNT; i++)
Expand All @@ -1591,18 +1684,22 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
builtin_maintenance_run_usage,
PARSE_OPT_STOP_AT_NON_OPTION);


maintenance_config_read(&maintenance_cfg);

if (opts.auto_flag && opts.schedule)
die(_("use at most one of --auto and --schedule=<frequency>"));

gc_config(&cfg);
gc_config(&gc_cfg);
initialize_task_config(opts.schedule);

if (argc != 0)
usage_with_options(builtin_maintenance_run_usage,
builtin_maintenance_run_options);

ret = maintenance_run_tasks(&opts, &cfg);
gc_config_release(&cfg);
ret = maintenance_run_tasks(&opts, &gc_cfg);
gc_config_release(&gc_cfg);
maintenance_config_release(&maintenance_cfg);
return ret;
}

Expand Down
77 changes: 44 additions & 33 deletions t/t7900-maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ test_expect_success 'prefetch multiple remotes' '
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&
GIT_TRACE2_EVENT="$(pwd)/run-prefetch.txt" git maintenance run --task=prefetch 2>/dev/null &&
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
test_subcommand git fetch remote1 $fetchargs <run-prefetch.txt &&
Expand All @@ -245,43 +246,53 @@ test_expect_success 'prefetch multiple remotes' '
test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt
'

test_expect_success 'loose-objects task' '
# Repack everything so we know the state of the object dir
git repack -adk &&
test_expect_success 'prefetch with default behavior (all remotes)' '
git clone . clone1 &&
git clone . clone2 &&
git remote add remote1 "file://$(pwd)/clone1" &&
git remote add remote2 "file://$(pwd)/clone2" &&
git -C clone1 switch -c one &&
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&
# Hack to stop maintenance from running during "git commit"
echo in use >.git/objects/maintenance.lock &&
GIT_TRACE2_EVENT="$(pwd)/run-prefetch-default.txt" git maintenance run --task=prefetch 2>/dev/null &&
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
test_subcommand git fetch remote1 $fetchargs <run-prefetch-default.txt &&
test_subcommand git fetch remote2 $fetchargs <run-prefetch-default.txt &&
# Assuming that "git commit" creates at least one loose object
test_commit create-loose-object &&
rm .git/objects/maintenance.lock &&
git for-each-ref refs/remotes >actual &&
test_must_be_empty actual &&
git log prefetch/remotes/remote1/one &&
git log prefetch/remotes/remote2/two &&
ls .git/objects >obj-dir-before &&
test_file_not_empty obj-dir-before &&
ls .git/objects/pack/*.pack >packs-before &&
test_line_count = 1 packs-before &&
git fetch --all &&
test_cmp_rev refs/remotes/remote1/one refs/prefetch/remotes/remote1/one &&
test_cmp_rev refs/remotes/remote2/two refs/prefetch/remotes/remote2/two
'

# The first run creates a pack-file
# but does not delete loose objects.
git maintenance run --task=loose-objects &&
ls .git/objects >obj-dir-between &&
test_cmp obj-dir-before obj-dir-between &&
ls .git/objects/pack/*.pack >packs-between &&
test_line_count = 2 packs-between &&
ls .git/objects/pack/loose-*.pack >loose-packs &&
test_line_count = 1 loose-packs &&
# The second run deletes loose objects
# but does not create a pack-file.
git maintenance run --task=loose-objects &&
ls .git/objects >obj-dir-after &&
cat >expect <<-\EOF &&
info
pack
EOF
test_cmp expect obj-dir-after &&
ls .git/objects/pack/*.pack >packs-after &&
test_cmp packs-between packs-after
test_expect_success 'prefetch with configurable remotes' '
git clone . clone1 &&
git clone . clone2 &&
git remote add remote1 "file://$(pwd)/clone1" &&
git remote add remote2 "file://$(pwd)/clone2" &&
git -C clone1 switch -c one &&
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&
git config maintenance.prefetch.remote1.refs "refs/heads/one" &&
GIT_TRACE2_EVENT="$(pwd)/run-prefetch-config.txt" git maintenance run --task=prefetch 2>/dev/null &&
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
cat run-prefetch-config.txt &&
test_subcommand git fetch remote1 $fetchargs refs/heads/one:refs/heads/one <run-prefetch-config.txt &&
test_subcommand ! git fetch remote2 $fetchargs <run-prefetch-config.txt &&
git for-each-ref refs/remotes >actual &&
test_must_be_empty actual &&
git log prefetch/remotes/remote1/one &&
test_must_fail git log prefetch/remotes/remote2/two
'

test_expect_success 'maintenance.loose-objects.auto' '
Expand Down

0 comments on commit d87e85b

Please sign in to comment.