Skip to content

Commit 79614b2

Browse files
djwongtytso
authored andcommitted
libext2fs/e2fsck: provide routines to read-ahead metadata
This patch adds to e2fsck the ability to pre-fetch metadata into the page cache in the hopes of speeding up fsck runs. There are two new functions -- the first allows a caller to readahead a list of blocks, and the second is a helper function that uses that first mechanism to load group data (bitmaps, inode tables). These new e2fsck routines require the addition of a dblist API to allow us to iterate a subset of a dblist. This will enable incremental directory block readahead in e2fsck pass 2. There's also a function to estimate the readahead given a FS. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 76761ca commit 79614b2

File tree

9 files changed

+359
-9
lines changed

9 files changed

+359
-9
lines changed

configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12390,7 +12390,7 @@ fi
1239012390
done
1239112391

1239212392
fi
12393-
for ac_header in dirent.h errno.h execinfo.h getopt.h malloc.h mntent.h paths.h semaphore.h setjmp.h signal.h stdarg.h stdint.h stdlib.h termios.h termio.h unistd.h utime.h attr/xattr.h linux/falloc.h linux/fd.h linux/major.h linux/loop.h net/if_dl.h netinet/in.h sys/disklabel.h sys/disk.h sys/file.h sys/ioctl.h sys/mkdev.h sys/mman.h sys/mount.h sys/prctl.h sys/resource.h sys/select.h sys/socket.h sys/sockio.h sys/stat.h sys/syscall.h sys/sysmacros.h sys/time.h sys/types.h sys/un.h sys/wait.h
12393+
for ac_header in dirent.h errno.h execinfo.h getopt.h malloc.h mntent.h paths.h semaphore.h setjmp.h signal.h stdarg.h stdint.h stdlib.h termios.h termio.h unistd.h utime.h attr/xattr.h linux/falloc.h linux/fd.h linux/major.h linux/loop.h net/if_dl.h netinet/in.h sys/disklabel.h sys/disk.h sys/file.h sys/ioctl.h sys/mkdev.h sys/mman.h sys/mount.h sys/prctl.h sys/resource.h sys/select.h sys/socket.h sys/sockio.h sys/stat.h sys/syscall.h sys/sysctl.h sys/sysmacros.h sys/time.h sys/types.h sys/un.h sys/wait.h
1239412394
do :
1239512395
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
1239612396
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"

configure.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ AC_CHECK_HEADERS(m4_flatten([
932932
sys/sockio.h
933933
sys/stat.h
934934
sys/syscall.h
935+
sys/sysctl.h
935936
sys/sysmacros.h
936937
sys/time.h
937938
sys/types.h

e2fsck/Makefile.in

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ OBJS= dict.o unix.o e2fsck.o super.o pass1.o pass1b.o pass2.o \
6262
pass3.o pass4.o pass5.o journal.o badblocks.o util.o dirinfo.o \
6363
dx_dirinfo.o ehandler.o problem.o message.o quota.o recovery.o \
6464
region.o revoke.o ea_refcount.o rehash.o profile.o prof_err.o \
65-
logfile.o sigcatcher.o $(MTRACE_OBJ) plausible.o
65+
logfile.o sigcatcher.o $(MTRACE_OBJ) plausible.o readahead.o
6666

6767
PROFILED_OBJS= profiled/dict.o profiled/unix.o profiled/e2fsck.o \
6868
profiled/super.o profiled/pass1.o profiled/pass1b.o \
@@ -72,8 +72,8 @@ PROFILED_OBJS= profiled/dict.o profiled/unix.o profiled/e2fsck.o \
7272
profiled/message.o profiled/problem.o profiled/quota.o \
7373
profiled/recovery.o profiled/region.o profiled/revoke.o \
7474
profiled/ea_refcount.o profiled/rehash.o profiled/profile.o \
75-
profiled/prof_err.o profiled/logfile.o \
76-
profiled/sigcatcher.o profiled/plausible.o
75+
profiled/prof_err.o profiled/logfile.o profiled/sigcatcher.o \
76+
profiled/plausible.o profiled/readahead.o
7777

7878
SRCS= $(srcdir)/e2fsck.c \
7979
$(srcdir)/dict.c \
@@ -97,6 +97,7 @@ SRCS= $(srcdir)/e2fsck.c \
9797
$(srcdir)/message.c \
9898
$(srcdir)/ea_refcount.c \
9999
$(srcdir)/rehash.c \
100+
$(srcdir)/readahead.c \
100101
$(srcdir)/region.c \
101102
$(srcdir)/profile.c \
102103
$(srcdir)/sigcatcher.c \
@@ -541,3 +542,6 @@ plausible.o: $(srcdir)/../misc/plausible.c $(top_builddir)/lib/config.h \
541542
$(top_builddir)/lib/ext2fs/ext2_err.h \
542543
$(top_srcdir)/lib/ext2fs/ext2_ext_attr.h $(top_srcdir)/lib/ext2fs/bitops.h \
543544
$(srcdir)/../misc/nls-enable.h $(srcdir)/../misc/plausible.h
545+
readahead.o: $(srcdir)/readahead.c $(top_builddir)/lib/config.h \
546+
$(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/ext2fs/ext2_fs.h \
547+
$(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/e2fsck.h prof_err.h

e2fsck/e2fsck.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,23 @@ extern ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix);
495495
extern errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino,
496496
int adj);
497497

498+
/* readahead.c */
499+
#define E2FSCK_READA_SUPER (0x01)
500+
#define E2FSCK_READA_GDT (0x02)
501+
#define E2FSCK_READA_BBITMAP (0x04)
502+
#define E2FSCK_READA_IBITMAP (0x08)
503+
#define E2FSCK_READA_ITABLE (0x10)
504+
#define E2FSCK_READA_ALL_FLAGS (0x1F)
505+
errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
506+
dgrp_t ngroups);
507+
#define E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT (0x01)
508+
#define E2FSCK_RA_DBLIST_ALL_FLAGS (0x01)
509+
errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
510+
ext2_dblist dblist,
511+
unsigned long long start,
512+
unsigned long long count);
513+
int e2fsck_can_readahead(ext2_filsys fs);
514+
unsigned long long e2fsck_guess_readahead(ext2_filsys fs);
498515

499516
/* region.c */
500517
extern region_t region_create(region_addr_t min, region_addr_t max);
@@ -582,6 +599,7 @@ extern errcode_t e2fsck_allocate_subcluster_bitmap(ext2_filsys fs,
582599
int default_type,
583600
const char *profile_name,
584601
ext2fs_block_bitmap *ret);
602+
unsigned long long get_memory_size(void);
585603

586604
/* unix.c */
587605
extern void e2fsck_clear_progbar(e2fsck_t ctx);

e2fsck/readahead.c

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
/*
2+
* readahead.c -- Prefetch filesystem metadata to speed up fsck.
3+
*
4+
* Copyright (C) 2014 Oracle.
5+
*
6+
* %Begin-Header%
7+
* This file may be redistributed under the terms of the GNU Library
8+
* General Public License, version 2.
9+
* %End-Header%
10+
*/
11+
12+
#include "config.h"
13+
#include <string.h>
14+
15+
#include "e2fsck.h"
16+
17+
#undef DEBUG
18+
19+
#ifdef DEBUG
20+
# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
21+
#else
22+
# define dbg_printf(f, a...)
23+
#endif
24+
25+
struct read_dblist {
26+
errcode_t err;
27+
blk64_t run_start;
28+
blk64_t run_len;
29+
int flags;
30+
};
31+
32+
static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db,
33+
void *priv_data)
34+
{
35+
struct read_dblist *pr = priv_data;
36+
e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ?
37+
1 : db->blockcnt);
38+
39+
if (!pr->run_len || db->blk != pr->run_start + pr->run_len) {
40+
if (pr->run_len) {
41+
pr->err = io_channel_cache_readahead(fs->io,
42+
pr->run_start,
43+
pr->run_len);
44+
dbg_printf("readahead start=%llu len=%llu err=%d\n",
45+
pr->run_start, pr->run_len,
46+
(int)pr->err);
47+
}
48+
pr->run_start = db->blk;
49+
pr->run_len = 0;
50+
}
51+
pr->run_len += count;
52+
53+
return pr->err ? DBLIST_ABORT : 0;
54+
}
55+
56+
errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
57+
ext2_dblist dblist,
58+
unsigned long long start,
59+
unsigned long long count)
60+
{
61+
errcode_t err;
62+
struct read_dblist pr;
63+
64+
dbg_printf("%s: flags=0x%x\n", __func__, flags);
65+
if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS)
66+
return EXT2_ET_INVALID_ARGUMENT;
67+
68+
memset(&pr, 0, sizeof(pr));
69+
pr.flags = flags;
70+
err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start,
71+
count, &pr);
72+
if (pr.err)
73+
return pr.err;
74+
if (err)
75+
return err;
76+
77+
if (pr.run_len)
78+
err = io_channel_cache_readahead(fs->io, pr.run_start,
79+
pr.run_len);
80+
81+
return err;
82+
}
83+
84+
static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs,
85+
ext2fs_block_bitmap ra_map)
86+
{
87+
blk64_t start, end, out;
88+
errcode_t err;
89+
90+
start = 1;
91+
end = ext2fs_blocks_count(fs->super) - 1;
92+
93+
err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out);
94+
while (err == 0) {
95+
start = out;
96+
err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end,
97+
&out);
98+
if (err == ENOENT) {
99+
out = end;
100+
err = 0;
101+
} else if (err)
102+
break;
103+
104+
err = io_channel_cache_readahead(fs->io, start, out - start);
105+
if (err)
106+
break;
107+
start = out;
108+
err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end,
109+
&out);
110+
}
111+
112+
if (err == ENOENT)
113+
err = 0;
114+
115+
return err;
116+
}
117+
118+
/* Try not to spew bitmap range errors for readahead */
119+
static errcode_t mark_bmap_range(ext2fs_block_bitmap map,
120+
blk64_t blk, unsigned int num)
121+
{
122+
if (blk >= ext2fs_get_generic_bmap_start(map) &&
123+
blk + num <= ext2fs_get_generic_bmap_end(map))
124+
ext2fs_mark_block_bitmap_range2(map, blk, num);
125+
else
126+
return EXT2_ET_INVALID_ARGUMENT;
127+
return 0;
128+
}
129+
130+
static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk)
131+
{
132+
if (blk >= ext2fs_get_generic_bmap_start(map) &&
133+
blk <= ext2fs_get_generic_bmap_end(map))
134+
ext2fs_mark_block_bitmap2(map, blk);
135+
else
136+
return EXT2_ET_INVALID_ARGUMENT;
137+
return 0;
138+
}
139+
140+
errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
141+
dgrp_t ngroups)
142+
{
143+
blk64_t super, old_gdt, new_gdt;
144+
blk_t blocks;
145+
dgrp_t i;
146+
ext2fs_block_bitmap ra_map = NULL;
147+
dgrp_t end = start + ngroups;
148+
errcode_t err = 0;
149+
150+
dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags,
151+
start, ngroups);
152+
if (flags & ~E2FSCK_READA_ALL_FLAGS)
153+
return EXT2_ET_INVALID_ARGUMENT;
154+
155+
if (end > fs->group_desc_count)
156+
end = fs->group_desc_count;
157+
158+
if (flags == 0)
159+
return 0;
160+
161+
err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap",
162+
&ra_map);
163+
if (err)
164+
return err;
165+
166+
for (i = start; i < end; i++) {
167+
err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt,
168+
&new_gdt, &blocks);
169+
if (err)
170+
break;
171+
172+
if (flags & E2FSCK_READA_SUPER) {
173+
err = mark_bmap(ra_map, super);
174+
if (err)
175+
break;
176+
}
177+
178+
if (flags & E2FSCK_READA_GDT) {
179+
err = mark_bmap_range(ra_map,
180+
old_gdt ? old_gdt : new_gdt,
181+
blocks);
182+
if (err)
183+
break;
184+
}
185+
186+
if ((flags & E2FSCK_READA_BBITMAP) &&
187+
!ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) &&
188+
ext2fs_bg_free_blocks_count(fs, i) <
189+
fs->super->s_blocks_per_group) {
190+
super = ext2fs_block_bitmap_loc(fs, i);
191+
err = mark_bmap(ra_map, super);
192+
if (err)
193+
break;
194+
}
195+
196+
if ((flags & E2FSCK_READA_IBITMAP) &&
197+
!ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) &&
198+
ext2fs_bg_free_inodes_count(fs, i) <
199+
fs->super->s_inodes_per_group) {
200+
super = ext2fs_inode_bitmap_loc(fs, i);
201+
err = mark_bmap(ra_map, super);
202+
if (err)
203+
break;
204+
}
205+
206+
if ((flags & E2FSCK_READA_ITABLE) &&
207+
ext2fs_bg_free_inodes_count(fs, i) <
208+
fs->super->s_inodes_per_group) {
209+
super = ext2fs_inode_table_loc(fs, i);
210+
blocks = fs->inode_blocks_per_group -
211+
(ext2fs_bg_itable_unused(fs, i) *
212+
EXT2_INODE_SIZE(fs->super) / fs->blocksize);
213+
err = mark_bmap_range(ra_map, super, blocks);
214+
if (err)
215+
break;
216+
}
217+
}
218+
219+
if (!err)
220+
err = e2fsck_readahead_bitmap(fs, ra_map);
221+
222+
ext2fs_free_block_bitmap(ra_map);
223+
return err;
224+
}
225+
226+
int e2fsck_can_readahead(ext2_filsys fs)
227+
{
228+
errcode_t err;
229+
230+
err = io_channel_cache_readahead(fs->io, 0, 1);
231+
dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED);
232+
return err != EXT2_ET_OP_NOT_SUPPORTED;
233+
}
234+
235+
unsigned long long e2fsck_guess_readahead(ext2_filsys fs)
236+
{
237+
unsigned long long guess;
238+
239+
/*
240+
* The optimal readahead sizes were experimentally determined by
241+
* djwong in August 2014. Setting the RA size to two block groups'
242+
* worth of inode table blocks seems to yield the largest reductions
243+
* in e2fsck runtime.
244+
*/
245+
guess = 2 * fs->blocksize * fs->inode_blocks_per_group;
246+
247+
/* Disable RA if it'd use more 1/50th of RAM. */
248+
if (get_memory_size() > (guess * 50))
249+
return guess / 1024;
250+
251+
return 0;
252+
}

e2fsck/util.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
#include <errno.h>
3838
#endif
3939

40+
#ifdef HAVE_SYS_SYSCTL_H
41+
#include <sys/sysctl.h>
42+
#endif
43+
4044
#include "e2fsck.h"
4145

4246
extern e2fsck_t e2fsck_global_ctx; /* Try your very best not to use this! */
@@ -819,3 +823,50 @@ errcode_t e2fsck_allocate_subcluster_bitmap(ext2_filsys fs, const char *descr,
819823
fs->default_bitmap_type = save_type;
820824
return retval;
821825
}
826+
827+
/* Return memory size in bytes */
828+
unsigned long long get_memory_size(void)
829+
{
830+
#if defined(_SC_PHYS_PAGES)
831+
# if defined(_SC_PAGESIZE)
832+
return (unsigned long long)sysconf(_SC_PHYS_PAGES) *
833+
(unsigned long long)sysconf(_SC_PAGESIZE);
834+
# elif defined(_SC_PAGE_SIZE)
835+
return (unsigned long long)sysconf(_SC_PHYS_PAGES) *
836+
(unsigned long long)sysconf(_SC_PAGE_SIZE);
837+
# endif
838+
#elif defined(CTL_HW)
839+
# if (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
840+
# define CTL_HW_INT64
841+
# elif (defined(HW_PHYSMEM) || defined(HW_REALMEM))
842+
# define CTL_HW_UINT
843+
# endif
844+
int mib[2];
845+
846+
mib[0] = CTL_HW;
847+
# if defined(HW_MEMSIZE)
848+
mib[1] = HW_MEMSIZE;
849+
# elif defined(HW_PHYSMEM64)
850+
mib[1] = HW_PHYSMEM64;
851+
# elif defined(HW_REALMEM)
852+
mib[1] = HW_REALMEM;
853+
# elif defined(HW_PYSMEM)
854+
mib[1] = HW_PHYSMEM;
855+
# endif
856+
# if defined(CTL_HW_INT64)
857+
unsigned long long size = 0;
858+
# elif defined(CTL_HW_UINT)
859+
unsigned int size = 0;
860+
# endif
861+
# if defined(CTL_HW_INT64) || defined(CTL_HW_UINT)
862+
size_t len = sizeof(size);
863+
864+
if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
865+
return (unsigned long long)size;
866+
# endif
867+
return 0;
868+
#else
869+
# warning "Don't know how to detect memory on your platform?"
870+
return 0;
871+
#endif
872+
}

0 commit comments

Comments
 (0)