diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index a6658a9c2800..04908f14795c 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -494,8 +494,7 @@ get_usage(zpool_help_t idx) "[--json-int, --json-pool-key-guid]] ...\n" "\t [-T d|u] [pool] [interval [count]]\n")); case HELP_PREFETCH: - return (gettext("\tprefetch -t [] \n" - "\t -t ddt \n")); + return (gettext("\tprefetch [-t ] \n")); case HELP_OFFLINE: return (gettext("\toffline [--power]|[[-f][-t]] " " ...\n")); @@ -4200,7 +4199,7 @@ zpool_do_checkpoint(int argc, char **argv) #define CHECKPOINT_OPT 1024 /* - * zpool prefetch [] + * zpool prefetch [-t ] * * Prefetchs a particular type of data in the specified pool. */ @@ -4245,20 +4244,27 @@ zpool_do_prefetch(int argc, char **argv) poolname = argv[0]; - argc--; - argv++; - - if (strcmp(typestr, "ddt") == 0) { - type = ZPOOL_PREFETCH_DDT; - } else { - (void) fprintf(stderr, gettext("unsupported prefetch type\n")); - usage(B_FALSE); - } - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); - err = zpool_prefetch(zhp, type); + if (typestr == NULL) { + /* Prefetch all types */ + err = zpool_prefetch(zhp, ZPOOL_PREFETCH_DDT); + if (err == 0) + err = zpool_prefetch(zhp, ZPOOL_PREFETCH_BRT); + } else { + if (strcmp(typestr, "ddt") == 0) { + type = ZPOOL_PREFETCH_DDT; + } else if (strcmp(typestr, "brt") == 0) { + type = ZPOOL_PREFETCH_BRT; + } else { + (void) fprintf(stderr, + gettext("unsupported prefetch type\n")); + zpool_close(zhp); + usage(B_FALSE); + } + err = zpool_prefetch(zhp, type); + } zpool_close(zhp); diff --git a/include/sys/brt.h b/include/sys/brt.h index d7c1814b084f..2a23a6a7f75d 100644 --- a/include/sys/brt.h +++ b/include/sys/brt.h @@ -56,6 +56,7 @@ extern void brt_create(spa_t *spa); extern int brt_load(spa_t *spa); extern void brt_unload(spa_t *spa); extern void brt_sync(spa_t *spa, uint64_t txg); +extern void brt_prefetch_all(spa_t *spa); #ifdef __cplusplus } diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 662fd81c5ee1..aa7421261786 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1713,7 +1713,8 @@ typedef enum { typedef enum { ZPOOL_PREFETCH_NONE = 0, - ZPOOL_PREFETCH_DDT + ZPOOL_PREFETCH_DDT, + ZPOOL_PREFETCH_BRT } zpool_prefetch_type_t; typedef enum { diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index ce154ae1a4cd..756d701e2d97 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1745,9 +1745,13 @@ zpool_prefetch(zpool_handle_t *zhp, zpool_prefetch_type_t type) error = lzc_pool_prefetch(zhp->zpool_name, type); if (error != 0) { + const char *typename = "unknown"; + if (type == ZPOOL_PREFETCH_DDT) + typename = "ddt"; + else if (type == ZPOOL_PREFETCH_BRT) + typename = "brt"; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot prefetch %s in '%s'"), - type == ZPOOL_PREFETCH_DDT ? "ddt" : "", zhp->zpool_name); + "cannot prefetch %s in '%s'"), typename, zhp->zpool_name); (void) zpool_standard_error(hdl, error, msg); return (-1); } diff --git a/man/man8/zpool-prefetch.8 b/man/man8/zpool-prefetch.8 index a36ad52e681e..6f4c3b129040 100644 --- a/man/man8/zpool-prefetch.8 +++ b/man/man8/zpool-prefetch.8 @@ -28,20 +28,25 @@ . .Sh NAME .Nm zpool-prefetch -.Nd Loads specific types of data for the given pool +.Nd Prefetches pool metadata into ARC .Sh SYNOPSIS .Nm zpool .Cm prefetch -.Fl t Ar type +.Op Fl t Ar type .Ar pool .Sh DESCRIPTION -.Bl -tag -width Ds -.It Xo -.Nm zpool -.Cm prefetch -.Fl t Li ddt -.Ar pool -.Xc -Prefetch data of a specific type for the given pool; specifically the DDT, -which will improve write I/O performance when the DDT is resident in the ARC. +Massively prefetch metadata of a specific type for the given pool into the ARC +to reduce latency of some operations later. +If no type is specified, all types are prefetched. +.Pp +The following types are supported: +.Bl -tag -width "brt" +.It Sy brt +Prefetch the BRT (block reference table). +This may improve performance for block cloning operations, +and frees for earlier cloned blocks. +.It Sy ddt +Prefetch the DDT (deduplication table). +This may improve performance of writes when deduplication is enabled, +and frees for earlier deduplicated blocks. .El diff --git a/module/zfs/brt.c b/module/zfs/brt.c index 9e389c8727b9..156c2e198fe4 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -1510,6 +1510,31 @@ brt_load(spa_t *spa) return (error); } +void +brt_prefetch_all(spa_t *spa) +{ + /* + * Load all BRT entries for each vdev. This is intended to perform + * a prefetch on all such blocks. For the same reason that brt_prefetch + * (called from brt_pending_add) isn't locked, this is also not locked. + */ + brt_rlock(spa); + for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { + brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; + brt_unlock(spa); + + rw_enter(&brtvd->bv_mos_entries_lock, RW_READER); + if (brtvd->bv_mos_entries != 0) { + (void) zap_prefetch_object(spa->spa_meta_objset, + brtvd->bv_mos_entries); + } + rw_exit(&brtvd->bv_mos_entries_lock); + + brt_rlock(spa); + } + brt_unlock(spa); +} + void brt_unload(spa_t *spa) { diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 249f878a1e22..5690f8afad00 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -850,12 +850,15 @@ dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size) return (err); /* - * Chunk the requests (16 indirects worth) so that we can be interrupted + * Chunk the requests (16 indirects worth) so that we can be + * interrupted. Prefetch at least SPA_MAXBLOCKSIZE at a time + * to better utilize pools with smaller block sizes. */ uint64_t chunksize; if (dn->dn_indblkshift) { uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1); chunksize = (nbps * 16) << dn->dn_datablkshift; + chunksize = MAX(chunksize, SPA_MAXBLOCKSIZE); } else { chunksize = dn->dn_datablksz; } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 5ca7c2320c4e..cb85bb12e9cd 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -212,6 +212,8 @@ #include #include #include +#include +#include #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -4276,13 +4278,11 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) spa_t *spa; int32_t type; - /* - * Currently, only ZPOOL_PREFETCH_DDT is supported - */ - if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 || - type != ZPOOL_PREFETCH_DDT) { + if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0) + return (EINVAL); + + if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT) return (EINVAL); - } error = spa_open(poolname, &spa, FTAG); if (error != 0) @@ -4290,10 +4290,17 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) hrtime_t start_time = gethrtime(); - ddt_prefetch_all(spa); - - zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name, - (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + if (type == ZPOOL_PREFETCH_DDT) { + ddt_prefetch_all(spa); + zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", + spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + } else { + brt_prefetch_all(spa); + zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms", + spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + } spa_close(spa, FTAG); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 9f531411fbe1..a69c6e3c8dd7 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -215,7 +215,7 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', tags = ['functional', 'cli_root', 'zfs_create'] [tests/functional/cli_root/zpool_prefetch] -tests = ['zpool_prefetch_001_pos'] +tests = ['zpool_prefetch_001_pos', 'zpool_prefetch_002_pos'] tags = ['functional', 'cli_root', 'zpool_prefetch'] [tests/functional/cli_root/zfs_destroy] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 678c01b58f94..23284234cdf7 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1217,6 +1217,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_prefetch/cleanup.ksh \ functional/cli_root/zpool_prefetch/setup.ksh \ functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh \ + functional/cli_root/zpool_prefetch/zpool_prefetch_002_pos.ksh \ functional/cli_root/zpool_reguid/cleanup.ksh \ functional/cli_root/zpool_reguid/setup.ksh \ functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh index 8ef3a66ad0d9..fd446e46e96c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh @@ -42,6 +42,15 @@ verify_runnable "both" log_assert "'zpool prefetch -t ddt ' can successfully load the DDT for a pool." +DATASET=$TESTPOOL/ddt + +function cleanup +{ + datasetexists $DATASET && destroy_dataset $DATASET -f +} + +log_onexit cleanup + function getddtstats { typeset -n gds=$1 @@ -75,9 +84,8 @@ log_must zpool prefetch -t ddt $TESTPOOL # Build up the deduplicated dataset. This consists of creating enough files # to generate a reasonable size DDT for testing purposes. -DATASET=$TESTPOOL/ddt log_must zfs create -o compression=off -o dedup=on $DATASET -MNTPOINT=$(get_prop mountpoint $TESTPOOL/ddt) +MNTPOINT=$(get_prop mountpoint $DATASET) log_note "Generating dataset ..." typeset -i i=0 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_002_pos.ksh new file mode 100755 index 000000000000..f34f8c36e592 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_002_pos.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by iXsystems, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool prefetch -t brt ' can successfully load a pool's BRT on demand. +# 'zpool prefetch ' without -t prefetches both DDT and BRT. +# +# STRATEGY: +# 1. Create a dataset with block cloning enabled. +# 2. Create files and clone them to populate the BRT. +# 3. Export and import the pool to flush caches. +# 4. Use zpool prefetch -t brt to load BRT. +# 5. Test zpool prefetch without -t to prefetch all types. +# + +verify_runnable "both" + +if ! command -v clonefile > /dev/null ; then + log_unsupported "clonefile program required to test block cloning" +fi + +log_assert "'zpool prefetch' can successfully load BRT and prefetch all types" + +DATASET=$TESTPOOL/brt + +function cleanup +{ + datasetexists $DATASET && destroy_dataset $DATASET -f +} + +log_onexit cleanup +log_must zfs create $DATASET +MNTPOINT=$(get_prop mountpoint $DATASET) + +log_note "Generating cloned blocks for BRT ..." + +# Create source file +log_must dd if=/dev/urandom of=$MNTPOINT/source bs=1M count=100 + +# Create clones using clonefile +typeset -i i=0 +while (( i < 50 )); do + log_must clonefile -f $MNTPOINT/source $MNTPOINT/clone.$i + ((i += 1)) +done + +sync_pool $TESTPOOL + +# Verify BRT has entries (non-zero saved space) +brt_saved=$(zpool get -Hp -o value bclone_saved $TESTPOOL) +log_note "BRT saved space: $brt_saved" +log_must test "$brt_saved" -gt "0" + +# Export/import to flush caches +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +# Test BRT prefetch - verify command succeeds +# Note: BRT does not expose cache statistics like DDT, so we can only +# verify the prefetch command completes successfully +log_must zpool prefetch -t brt $TESTPOOL + +# Test prefetch without -t (should prefetch all types including BRT) +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zpool prefetch $TESTPOOL + +log_pass "'zpool prefetch' successfully loads BRT and all types"