Skip to content

Commit 8d39910

Browse files
committed
maintenance: prefetch config for remotes/refs
Large repositories often contain numerous branches and refs, many of which individual users may not need. This commit introduces a new configuration option (`maintenance.prefetch.<remote>.refs`) to allow users to specify which remotes and refs should be prefetched during the maintenance task. Key behaviors: 1. If no configuration is set, all remotes and refs are prefetched (preserving the current behavior). 2. If any configuration is set, only the specified remotes and refs are prefetched. 3. Remotes without configuration are skipped if any configuration exists. This change allows users to optimize their prefetch operations, potentially reducing network traffic and disk usage, especially for large repositories with many branches. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
1 parent 4590f2e commit 8d39910

File tree

3 files changed

+167
-41
lines changed

3 files changed

+167
-41
lines changed

Documentation/config/maintenance.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,21 @@ maintenance.incremental-repack.auto::
6969
Otherwise, a positive value implies the command should run when the
7070
number of pack-files not in the multi-pack-index is at least the value
7171
of `maintenance.incremental-repack.auto`. The default value is 10.
72+
73+
maintenance.prefetch.<remote>.refs::
74+
This multi-valued config option specifies which refs to prefetch
75+
for each remote during the prefetch maintenance task. Each value
76+
of this option is a refspec source that will be used when fetching from
77+
the specified remote. This is useful for large active repositories where
78+
fetching all refs and remotes might not be very efficient.
79+
+
80+
For example, to prefetch only the master branch from the origin remote,
81+
and all branches from the upstream remote, you would use:
82+
+
83+
----
84+
$ git config maintenance.prefetch.origin.refs refs/heads/master
85+
$ git config maintenance.prefetch.upstream.refs refs/heads/*
86+
----
87+
+
88+
If this option is not set for a remote, the prefetch task will use
89+
the default behavior of fetching all refs from all remotes.

builtin/gc.c

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "hex.h"
1818
#include "repository.h"
1919
#include "config.h"
20+
#include "string-list.h"
2021
#include "tempfile.h"
2122
#include "lockfile.h"
2223
#include "parse-options.h"
@@ -246,6 +247,7 @@ struct maintenance_run_opts {
246247
int quiet;
247248
enum schedule_priority schedule;
248249
};
250+
249251
#define MAINTENANCE_RUN_OPTS_INIT { \
250252
.detach = -1, \
251253
}
@@ -880,6 +882,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
880882
return 0;
881883
}
882884

885+
struct maintenance_config {
886+
struct prefetch_config_list {
887+
struct prefetch_config {
888+
char *remote;
889+
struct string_list refs;
890+
} *items;
891+
int nr, alloc;
892+
} prefetch;
893+
};
894+
895+
#define MAINTENANCE_CONFIG_INIT { \
896+
.prefetch = { NULL, 0, 0 }, \
897+
}
898+
899+
static struct maintenance_config maintenance_cfg = MAINTENANCE_CONFIG_INIT;
900+
883901
static const char *const builtin_maintenance_run_usage[] = {
884902
N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>] [--schedule]"),
885903
NULL
@@ -1023,22 +1041,94 @@ static int fetch_remote(struct remote *remote, void *cbdata)
10231041
{
10241042
struct maintenance_run_opts *opts = cbdata;
10251043
struct child_process child = CHILD_PROCESS_INIT;
1044+
struct prefetch_config *prefetch_cfg = NULL;
1045+
static int has_prefetch_cfg = -1; // -1: unknown, 0: no config, 1: config exists
10261046

10271047
if (remote->skip_default_update)
10281048
return 0;
10291049

1050+
if (has_prefetch_cfg == -1)
1051+
has_prefetch_cfg = (maintenance_cfg.prefetch.nr > 0);
1052+
1053+
if (has_prefetch_cfg) {
1054+
for (int i = 0; i < maintenance_cfg.prefetch.nr; i++) {
1055+
if (!strcmp(remote->name, maintenance_cfg.prefetch.items[i].remote)) {
1056+
prefetch_cfg = &maintenance_cfg.prefetch.items[i];
1057+
break;
1058+
}
1059+
}
1060+
1061+
if (!prefetch_cfg)
1062+
return 0;
1063+
}
1064+
10301065
child.git_cmd = 1;
1031-
strvec_pushl(&child.args, "fetch", remote->name,
1032-
"--prefetch", "--prune", "--no-tags",
1033-
"--no-write-fetch-head", "--recurse-submodules=no",
1034-
NULL);
1066+
strvec_pushl(&child.args, "fetch", remote->name, "--prefetch", "--prune", "--no-tags",
1067+
"--no-write-fetch-head", "--recurse-submodules=no", NULL);
10351068

10361069
if (opts->quiet)
10371070
strvec_push(&child.args, "--quiet");
10381071

1072+
if (prefetch_cfg && prefetch_cfg->refs.nr > 0) {
1073+
struct string_list_item *item;
1074+
for_each_string_list_item(item, &prefetch_cfg->refs)
1075+
strvec_pushf(&child.args, "%s:%s", item->string, item->string);
1076+
}
1077+
10391078
return !!run_command(&child);
10401079
}
10411080

1081+
static int maintenance_config_callback(const char *key, const char *value,
1082+
const struct config_context *ctx,
1083+
void *data)
1084+
{
1085+
struct maintenance_config *config = data;
1086+
const char *remote_name;
1087+
const char *refs_key;
1088+
struct prefetch_config *pc;
1089+
struct strbuf name = STRBUF_INIT;
1090+
1091+
if (!skip_prefix(key, "maintenance.prefetch.", &remote_name))
1092+
return 0;
1093+
1094+
refs_key = strrchr(remote_name, '.');
1095+
if (!refs_key || strcmp(refs_key + 1, "refs"))
1096+
return 0;
1097+
1098+
strbuf_add(&name, remote_name, refs_key - remote_name);
1099+
1100+
REALLOC_ARRAY(config->prefetch.items, config->prefetch.nr + 1);
1101+
pc = &config->prefetch.items[config->prefetch.nr++];
1102+
pc->remote = strbuf_detach(&name, NULL);
1103+
string_list_init_dup(&pc->refs);
1104+
pc->refs.strdup_strings = 1;
1105+
string_list_split(&pc->refs, value, ' ', -1);
1106+
1107+
return 0;
1108+
}
1109+
1110+
static void maintenance_config_read(struct maintenance_config *config)
1111+
{
1112+
if (git_config(maintenance_config_callback, config) < 0)
1113+
die(_("failed to read maintenance configuration"));
1114+
}
1115+
1116+
static void maintenance_config_release(struct maintenance_config *config)
1117+
{
1118+
int i;
1119+
1120+
if (!config->prefetch.items)
1121+
return;
1122+
1123+
for (i = 0; i < config->prefetch.nr; i++) {
1124+
free(config->prefetch.items[i].remote);
1125+
string_list_clear(&config->prefetch.items[i].refs, 1);
1126+
}
1127+
1128+
free(config->prefetch.items);
1129+
memset(config, 0, sizeof(*config));
1130+
}
1131+
10421132
static int maintenance_task_prefetch(struct maintenance_run_opts *opts,
10431133
struct gc_config *cfg)
10441134
{
@@ -1563,7 +1653,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
15631653
{
15641654
int i;
15651655
struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT;
1566-
struct gc_config cfg = GC_CONFIG_INIT;
1656+
struct gc_config gc_cfg = GC_CONFIG_INIT;
15671657
struct option builtin_maintenance_run_options[] = {
15681658
OPT_BOOL(0, "auto", &opts.auto_flag,
15691659
N_("run tasks based on the state of the repository")),
@@ -1579,8 +1669,11 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
15791669
PARSE_OPT_NONEG, task_option_parse),
15801670
OPT_END()
15811671
};
1672+
15821673
int ret;
15831674

1675+
maintenance_config_read(&maintenance_cfg);
1676+
15841677
opts.quiet = !isatty(2);
15851678

15861679
for (i = 0; i < TASK__COUNT; i++)
@@ -1591,18 +1684,22 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
15911684
builtin_maintenance_run_usage,
15921685
PARSE_OPT_STOP_AT_NON_OPTION);
15931686

1687+
1688+
maintenance_config_read(&maintenance_cfg);
1689+
15941690
if (opts.auto_flag && opts.schedule)
15951691
die(_("use at most one of --auto and --schedule=<frequency>"));
15961692

1597-
gc_config(&cfg);
1693+
gc_config(&gc_cfg);
15981694
initialize_task_config(opts.schedule);
15991695

16001696
if (argc != 0)
16011697
usage_with_options(builtin_maintenance_run_usage,
16021698
builtin_maintenance_run_options);
16031699

1604-
ret = maintenance_run_tasks(&opts, &cfg);
1605-
gc_config_release(&cfg);
1700+
ret = maintenance_run_tasks(&opts, &gc_cfg);
1701+
gc_config_release(&gc_cfg);
1702+
maintenance_config_release(&maintenance_cfg);
16061703
return ret;
16071704
}
16081705

t/t7900-maintenance.sh

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ test_expect_success 'prefetch multiple remotes' '
223223
git -C clone2 switch -c two &&
224224
test_commit -C clone1 one &&
225225
test_commit -C clone2 two &&
226+
226227
GIT_TRACE2_EVENT="$(pwd)/run-prefetch.txt" git maintenance run --task=prefetch 2>/dev/null &&
227228
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
228229
test_subcommand git fetch remote1 $fetchargs <run-prefetch.txt &&
@@ -245,43 +246,53 @@ test_expect_success 'prefetch multiple remotes' '
245246
test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt
246247
'
247248

248-
test_expect_success 'loose-objects task' '
249-
# Repack everything so we know the state of the object dir
250-
git repack -adk &&
249+
test_expect_success 'prefetch with default behavior (all remotes)' '
250+
git clone . clone1 &&
251+
git clone . clone2 &&
252+
git remote add remote1 "file://$(pwd)/clone1" &&
253+
git remote add remote2 "file://$(pwd)/clone2" &&
254+
git -C clone1 switch -c one &&
255+
git -C clone2 switch -c two &&
256+
test_commit -C clone1 one &&
257+
test_commit -C clone2 two &&
251258
252-
# Hack to stop maintenance from running during "git commit"
253-
echo in use >.git/objects/maintenance.lock &&
259+
GIT_TRACE2_EVENT="$(pwd)/run-prefetch-default.txt" git maintenance run --task=prefetch 2>/dev/null &&
260+
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
261+
test_subcommand git fetch remote1 $fetchargs <run-prefetch-default.txt &&
262+
test_subcommand git fetch remote2 $fetchargs <run-prefetch-default.txt &&
254263
255-
# Assuming that "git commit" creates at least one loose object
256-
test_commit create-loose-object &&
257-
rm .git/objects/maintenance.lock &&
264+
git for-each-ref refs/remotes >actual &&
265+
test_must_be_empty actual &&
266+
git log prefetch/remotes/remote1/one &&
267+
git log prefetch/remotes/remote2/two &&
258268
259-
ls .git/objects >obj-dir-before &&
260-
test_file_not_empty obj-dir-before &&
261-
ls .git/objects/pack/*.pack >packs-before &&
262-
test_line_count = 1 packs-before &&
269+
git fetch --all &&
270+
test_cmp_rev refs/remotes/remote1/one refs/prefetch/remotes/remote1/one &&
271+
test_cmp_rev refs/remotes/remote2/two refs/prefetch/remotes/remote2/two
272+
'
263273

264-
# The first run creates a pack-file
265-
# but does not delete loose objects.
266-
git maintenance run --task=loose-objects &&
267-
ls .git/objects >obj-dir-between &&
268-
test_cmp obj-dir-before obj-dir-between &&
269-
ls .git/objects/pack/*.pack >packs-between &&
270-
test_line_count = 2 packs-between &&
271-
ls .git/objects/pack/loose-*.pack >loose-packs &&
272-
test_line_count = 1 loose-packs &&
273-
274-
# The second run deletes loose objects
275-
# but does not create a pack-file.
276-
git maintenance run --task=loose-objects &&
277-
ls .git/objects >obj-dir-after &&
278-
cat >expect <<-\EOF &&
279-
info
280-
pack
281-
EOF
282-
test_cmp expect obj-dir-after &&
283-
ls .git/objects/pack/*.pack >packs-after &&
284-
test_cmp packs-between packs-after
274+
test_expect_success 'prefetch with configurable remotes' '
275+
git clone . clone1 &&
276+
git clone . clone2 &&
277+
git remote add remote1 "file://$(pwd)/clone1" &&
278+
git remote add remote2 "file://$(pwd)/clone2" &&
279+
git -C clone1 switch -c one &&
280+
git -C clone2 switch -c two &&
281+
test_commit -C clone1 one &&
282+
test_commit -C clone2 two &&
283+
284+
git config maintenance.prefetch.remote1.refs "refs/heads/one" &&
285+
286+
GIT_TRACE2_EVENT="$(pwd)/run-prefetch-config.txt" git maintenance run --task=prefetch 2>/dev/null &&
287+
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
288+
cat run-prefetch-config.txt &&
289+
test_subcommand git fetch remote1 $fetchargs refs/heads/one:refs/heads/one <run-prefetch-config.txt &&
290+
test_subcommand ! git fetch remote2 $fetchargs <run-prefetch-config.txt &&
291+
292+
git for-each-ref refs/remotes >actual &&
293+
test_must_be_empty actual &&
294+
git log prefetch/remotes/remote1/one &&
295+
test_must_fail git log prefetch/remotes/remote2/two
285296
'
286297

287298
test_expect_success 'maintenance.loose-objects.auto' '

0 commit comments

Comments
 (0)