Skip to content

Commit f043683

Browse files
Eric Wonggitster
Eric Wong
authored andcommitted
cat-file: use writev(2) if available
Using writev here is 20-40% faster than three write syscalls in succession for smaller (1-10k) objects in the delta base cache. This advantage decreases as object sizes approach pipe size (64k on Linux). writev reduces wakeups and syscalls on the read side as well: each write(2) syscall may trigger one or more corresponding read(2) syscalls in the reader. Attempting atomicity in the writer via writev also reduces the likelyhood of non-blocking readers failing with EAGAIN and having to call poll||select before attempting to read again. Unfortunately, this turns into a small (1-3%) slowdown for gigantic objects of a megabyte or more even with after increasing pipe size to 1MB via the F_SETPIPE_SZ fcntl(2) op. This slowdown is acceptable to me since the vast majority of objects are 64K or less for projects I've looked at. Relying on stdio buffering and fflush(3) after each response was considered for users without --buffer, but historically cat-file defaults to being compatible with non-blocking stdout and able to poll(2) after hitting EAGAIN on write(2). Using stdio on files with the O_NONBLOCK flag is (AFAIK) unspecified and likely subject to portability problems and thus avoided. Signed-off-by: Eric Wong <e@80x24.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 1732dda commit f043683

File tree

8 files changed

+149
-18
lines changed

8 files changed

+149
-18
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,6 +1844,9 @@ ifdef NO_PREAD
18441844
COMPAT_CFLAGS += -DNO_PREAD
18451845
COMPAT_OBJS += compat/pread.o
18461846
endif
1847+
ifdef HAVE_WRITEV
1848+
COMPAT_CFLAGS += -DHAVE_WRITEV
1849+
endif
18471850
ifdef NO_FAST_WORKING_DIRECTORY
18481851
BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
18491852
endif

builtin/cat-file.c

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ struct expand_data {
280280
off_t disk_size;
281281
const char *rest;
282282
struct object_id delta_base_oid;
283-
void *content;
283+
struct git_iovec iov[3];
284284

285285
/*
286286
* If mark_query is true, we do not expand anything, but rather
@@ -378,17 +378,42 @@ static void batch_write(struct batch_options *opt, const void *data, size_t len)
378378
write_or_die(1, data, len);
379379
}
380380

381-
static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
381+
static void batch_writev(struct batch_options *opt, struct expand_data *data,
382+
const struct strbuf *hdr, size_t size)
383+
{
384+
data->iov[0].iov_base = hdr->buf;
385+
data->iov[0].iov_len = hdr->len;
386+
data->iov[1].iov_len = size;
387+
388+
/*
389+
* Copying a (8|16)-byte iovec for a single byte is gross, but my
390+
* attempt to stuff output_delim into the trailing NUL byte of
391+
* iov[1].iov_base (and restoring it after writev(2) for the
392+
* OI_DBCACHED case) to drop iovcnt from 3->2 wasn't faster.
393+
*/
394+
data->iov[2].iov_base = &opt->output_delim;
395+
data->iov[2].iov_len = 1;
396+
397+
if (opt->buffer_output)
398+
fwritev_or_die(stdout, data->iov, 3);
399+
else
400+
writev_or_die(1, data->iov, 3);
401+
402+
/* writev_or_die may move iov[1].iov_base, so it's invalid */
403+
data->iov[1].iov_base = NULL;
404+
}
405+
406+
static void print_object_or_die(struct batch_options *opt,
407+
struct expand_data *data, struct strbuf *hdr)
382408
{
383409
const struct object_id *oid = &data->oid;
384410

385411
assert(data->info.typep);
386412

387-
if (data->content) {
388-
void *content = data->content;
413+
if (data->iov[1].iov_base) {
414+
void *content = data->iov[1].iov_base;
389415
unsigned long size = data->size;
390416

391-
data->content = NULL;
392417
if (use_mailmap && (data->type == OBJ_COMMIT ||
393418
data->type == OBJ_TAG)) {
394419
size_t s = size;
@@ -399,10 +424,10 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
399424
}
400425

401426
content = replace_idents_using_mailmap(content, &s);
427+
data->iov[1].iov_base = content;
402428
size = cast_size_t_to_ulong(s);
403429
}
404-
405-
batch_write(opt, content, size);
430+
batch_writev(opt, data, hdr, size);
406431
switch (data->info.whence) {
407432
case OI_CACHED:
408433
/*
@@ -419,8 +444,6 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
419444
}
420445
} else {
421446
assert(data->type == OBJ_BLOB);
422-
if (opt->buffer_output)
423-
fflush(stdout);
424447
if (opt->transform_mode) {
425448
char *contents;
426449
unsigned long size;
@@ -447,10 +470,15 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
447470
oid_to_hex(oid), data->rest);
448471
} else
449472
BUG("invalid transform_mode: %c", opt->transform_mode);
450-
batch_write(opt, contents, size);
473+
data->iov[1].iov_base = contents;
474+
batch_writev(opt, data, hdr, size);
451475
free(contents);
452476
} else {
477+
batch_write(opt, hdr->buf, hdr->len);
478+
if (opt->buffer_output)
479+
fflush(stdout);
453480
stream_blob(oid);
481+
batch_write(opt, &opt->output_delim, 1);
454482
}
455483
}
456484
}
@@ -519,12 +547,10 @@ static void batch_object_write(const char *obj_name,
519547
strbuf_addch(scratch, opt->output_delim);
520548
}
521549

522-
batch_write(opt, scratch->buf, scratch->len);
523-
524-
if (opt->batch_mode == BATCH_MODE_CONTENTS) {
525-
print_object_or_die(opt, data);
526-
batch_write(opt, &opt->output_delim, 1);
527-
}
550+
if (opt->batch_mode == BATCH_MODE_CONTENTS)
551+
print_object_or_die(opt, data, scratch);
552+
else
553+
batch_write(opt, scratch->buf, scratch->len);
528554
}
529555

530556
static void batch_one_object(const char *obj_name,
@@ -666,7 +692,7 @@ static void parse_cmd_contents(struct batch_options *opt,
666692
struct expand_data *data)
667693
{
668694
opt->batch_mode = BATCH_MODE_CONTENTS;
669-
data->info.contentp = &data->content;
695+
data->info.contentp = &data->iov[1].iov_base;
670696
batch_one_object(line, output, opt, data);
671697
}
672698

@@ -823,7 +849,7 @@ static int batch_objects(struct batch_options *opt)
823849
data.info.typep = &data.type;
824850
if (!opt->transform_mode) {
825851
data.info.sizep = &data.size;
826-
data.info.contentp = &data.content;
852+
data.info.contentp = &data.iov[1].iov_base;
827853
data.info.content_limit = big_file_threshold;
828854
data.info.direct_cache = 1;
829855
}

config.mak.uname

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ ifeq ($(uname_S),Linux)
6969
BASIC_CFLAGS += -std=c99
7070
endif
7171
LINK_FUZZ_PROGRAMS = YesPlease
72+
HAVE_WRITEV = YesPlease
7273
endif
7374
ifeq ($(uname_S),GNU/kFreeBSD)
7475
HAVE_ALLOCA_H = YesPlease
@@ -77,6 +78,7 @@ ifeq ($(uname_S),GNU/kFreeBSD)
7778
DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease
7879
LIBC_CONTAINS_LIBINTL = YesPlease
7980
FREAD_READS_DIRECTORIES = UnfortunatelyYes
81+
HAVE_WRITEV = YesPlease
8082
endif
8183
ifeq ($(uname_S),UnixWare)
8284
CC = cc
@@ -292,6 +294,7 @@ ifeq ($(uname_S),FreeBSD)
292294
PAGER_ENV = LESS=FRX LV=-c MORE=FRX
293295
FREAD_READS_DIRECTORIES = UnfortunatelyYes
294296
FILENO_IS_A_MACRO = UnfortunatelyYes
297+
HAVE_WRITEV = YesPlease
295298
endif
296299
ifeq ($(uname_S),OpenBSD)
297300
NO_STRCASESTR = YesPlease
@@ -307,6 +310,7 @@ ifeq ($(uname_S),OpenBSD)
307310
PROCFS_EXECUTABLE_PATH = /proc/curproc/file
308311
FREAD_READS_DIRECTORIES = UnfortunatelyYes
309312
FILENO_IS_A_MACRO = UnfortunatelyYes
313+
HAVE_WRITEV = YesPlease
310314
endif
311315
ifeq ($(uname_S),MirBSD)
312316
NO_STRCASESTR = YesPlease
@@ -329,6 +333,7 @@ ifeq ($(uname_S),NetBSD)
329333
HAVE_BSD_KERN_PROC_SYSCTL = YesPlease
330334
CSPRNG_METHOD = arc4random
331335
PROCFS_EXECUTABLE_PATH = /proc/curproc/exe
336+
HAVE_WRITEV = YesPlease
332337
endif
333338
ifeq ($(uname_S),AIX)
334339
DEFAULT_PAGER = more

git-compat-util.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,16 @@ static inline int git_setitimer(int which UNUSED,
388388
#define setitimer(which,value,ovalue) git_setitimer(which,value,ovalue)
389389
#endif
390390

391+
#ifdef HAVE_WRITEV
392+
#include <sys/uio.h>
393+
#define git_iovec iovec
394+
#else /* !HAVE_WRITEV */
395+
struct git_iovec {
396+
void *iov_base;
397+
size_t iov_len;
398+
};
399+
#endif /* !HAVE_WRITEV */
400+
391401
#ifndef NO_LIBGEN_H
392402
#include <libgen.h>
393403
#else

wrapper.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,24 @@ ssize_t xwrite(int fd, const void *buf, size_t len)
262262
}
263263
}
264264

265+
#ifdef HAVE_WRITEV
266+
ssize_t xwritev(int fd, const struct iovec *iov, int iovcnt)
267+
{
268+
while (1) {
269+
ssize_t nr = writev(fd, iov, iovcnt);
270+
271+
if (nr < 0) {
272+
if (errno == EINTR)
273+
continue;
274+
if (handle_nonblock(fd, POLLOUT, errno))
275+
continue;
276+
}
277+
278+
return nr;
279+
}
280+
}
281+
#endif /* !HAVE_WRITEV */
282+
265283
/*
266284
* xpread() is the same as pread(), but it automatically restarts pread()
267285
* operations with a recoverable error (EAGAIN and EINTR). xpread() DOES

wrapper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ void *xmmap_gently(void *start, size_t length, int prot, int flags, int fd, off_
1616
int xopen(const char *path, int flags, ...);
1717
ssize_t xread(int fd, void *buf, size_t len);
1818
ssize_t xwrite(int fd, const void *buf, size_t len);
19+
ssize_t xwritev(int fd, const struct git_iovec *, int iovcnt);
1920
ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
2021
int xdup(int fd);
2122
FILE *xfopen(const char *path, const char *mode);

write-or-die.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,69 @@ void fflush_or_die(FILE *f)
107107
if (fflush(f))
108108
die_errno("fflush error");
109109
}
110+
111+
void fwritev_or_die(FILE *fp, const struct git_iovec *iov, int iovcnt)
112+
{
113+
int i;
114+
115+
for (i = 0; i < iovcnt; i++) {
116+
size_t n = iov[i].iov_len;
117+
118+
if (fwrite(iov[i].iov_base, 1, n, fp) != n)
119+
die_errno("unable to write to FD=%d", fileno(fp));
120+
}
121+
}
122+
123+
/*
124+
* note: we don't care about atomicity from writev(2) right now.
125+
* The goal is to avoid allocations+copies in the writer and
126+
* reduce wakeups+syscalls in the reader.
127+
* n.b. @iov is not const since we modify it to avoid allocating
128+
* on partial write.
129+
*/
130+
#ifdef HAVE_WRITEV
131+
void writev_or_die(int fd, struct git_iovec *iov, int iovcnt)
132+
{
133+
int i;
134+
135+
while (iovcnt > 0) {
136+
ssize_t n = xwritev(fd, iov, iovcnt);
137+
138+
/* EINVAL happens when sum of iov_len exceeds SSIZE_MAX */
139+
if (n < 0 && errno == EINVAL)
140+
n = xwrite(fd, iov[0].iov_base, iov[0].iov_len);
141+
if (n < 0) {
142+
check_pipe(errno);
143+
die_errno("writev error");
144+
} else if (!n) {
145+
errno = ENOSPC;
146+
die_errno("writev_error");
147+
}
148+
/* skip fully written iovs, retry from the first partial iov */
149+
for (i = 0; i < iovcnt; i++) {
150+
if (n >= iov[i].iov_len) {
151+
n -= iov[i].iov_len;
152+
} else {
153+
iov[i].iov_len -= n;
154+
iov[i].iov_base = (char *)iov[i].iov_base + n;
155+
break;
156+
}
157+
}
158+
iovcnt -= i;
159+
iov += i;
160+
}
161+
}
162+
#else /* !HAVE_WRITEV */
163+
164+
/*
165+
* n.b. don't use stdio fwrite here even if it's faster, @fd may be
166+
* non-blocking and stdio isn't equipped for EAGAIN
167+
*/
168+
void writev_or_die(int fd, struct git_iovec *iov, int iovcnt)
169+
{
170+
int i;
171+
172+
for (i = 0; i < iovcnt; i++)
173+
write_or_die(fd, iov[i].iov_base, iov[i].iov_len);
174+
}
175+
#endif /* !HAVE_WRITEV */

write-or-die.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ void fprintf_or_die(FILE *, const char *fmt, ...);
77
void fwrite_or_die(FILE *f, const void *buf, size_t count);
88
void fflush_or_die(FILE *f);
99
void write_or_die(int fd, const void *buf, size_t count);
10+
void writev_or_die(int fd, struct git_iovec *, int iovcnt);
11+
void fwritev_or_die(FILE *, const struct git_iovec *, int iovcnt);
1012

1113
/*
1214
* These values are used to help identify parts of a repository to fsync.

0 commit comments

Comments
 (0)