Skip to content

Commit a2abaca

Browse files
committed
ZIO: Set minimum number of free issue threads to 32
Free issue threads might block waiting for synchronous DDT, BRT or GANG header reads. So unlike other taskqs using ZTI_SCALE to scale with number of CPUs, here we also need some amount of threads to potentially saturate pool reads. I am not sure we always want the 96 threads we had before ZTI_SCALE introduction at #11966 on small systems, but lets make it at least 32. While here, make free taskqs configurable, similar to read and write ones. Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
1 parent 8c225ff commit a2abaca

File tree

3 files changed

+127
-18
lines changed

3 files changed

+127
-18
lines changed

include/os/freebsd/spl/sys/mod.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@
104104
#define spa_taskq_write_param_set_args(var) \
105105
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
106106

107+
#define spa_taskq_free_param_set_args(var) \
108+
CTLTYPE_STRING, NULL, 0, spa_taskq_free_param, "A"
109+
107110
#define fletcher_4_param_set_args(var) \
108111
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
109112

man/man4/zfs.4

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,12 +2660,50 @@ Set value only applies to pools imported/created after that.
26602660
Set the queue and thread configuration for the IO read queues.
26612661
This is an advanced debugging parameter.
26622662
Don't change this unless you understand what it does.
2663+
Each of the four values corresponds to the issue, issue high-priority,
2664+
interrupt, and interrupt high-priority queues.
2665+
Valid values are
2666+
.Sy fixed,N,M
2667+
(M queues with N threads each),
2668+
.Sy scale[,MIN]
2669+
(scale with CPUs, minimum MIN total threads),
2670+
.Sy sync ,
2671+
and
2672+
.Sy null .
26632673
Set values only apply to pools imported/created after that.
26642674
.
26652675
.It Sy zio_taskq_write Ns = Ns Sy sync null scale null Pq charp
26662676
Set the queue and thread configuration for the IO write queues.
26672677
This is an advanced debugging parameter.
26682678
Don't change this unless you understand what it does.
2679+
Each of the four values corresponds to the issue, issue high-priority,
2680+
interrupt, and interrupt high-priority queues.
2681+
Valid values are
2682+
.Sy fixed,N,M
2683+
(M queues with N threads each),
2684+
.Sy scale[,MIN]
2685+
(scale with CPUs, minimum MIN total threads),
2686+
.Sy sync ,
2687+
and
2688+
.Sy null .
2689+
Set values only apply to pools imported/created after that.
2690+
.
2691+
.It Sy zio_taskq_free Ns = Ns Sy scale,32 null null null Pq charp
2692+
Set the queue and thread configuration for the IO free queues.
2693+
This is an advanced debugging parameter.
2694+
Don't change this unless you understand what it does.
2695+
Each of the four values corresponds to the issue, issue high-priority,
2696+
interrupt, and interrupt high-priority queues.
2697+
Valid values are
2698+
.Sy fixed,N,M
2699+
(M queues with N threads each),
2700+
.Sy scale[,MIN]
2701+
(scale with CPUs, minimum MIN total threads),
2702+
.Sy sync ,
2703+
and
2704+
.Sy null .
2705+
The default uses a minimum of 32 threads to improve parallelism for
2706+
DDT and BRT metadata operations during frees.
26692707
Set values only apply to pools imported/created after that.
26702708
.
26712709
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint

module/zfs/spa.c

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ typedef enum zti_modes {
141141

142142
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
143143
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
144-
#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
144+
#define ZTI_SCALE(min) { ZTI_MODE_SCALE, (min), 1 }
145145
#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
146146
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
147147

@@ -180,13 +180,13 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
180180
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
181181
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
182182
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
183-
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
183+
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* READ */
184184
#ifdef illumos
185-
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
185+
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE(0), ZTI_N(5) }, /* WRITE */
186186
#else
187-
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */
187+
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* WRITE */
188188
#endif
189-
{ ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
189+
{ ZTI_SCALE(32), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
190190
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
191191
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */
192192
{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */
@@ -1170,7 +1170,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
11701170
uint_t value = ztip->zti_value;
11711171
uint_t count = ztip->zti_count;
11721172
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
1173-
uint_t cpus, flags = TASKQ_DYNAMIC;
1173+
uint_t cpus, threads, flags = TASKQ_DYNAMIC;
11741174

11751175
switch (mode) {
11761176
case ZTI_MODE_FIXED:
@@ -1183,8 +1183,8 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
11831183
* Create one wr_iss taskq for every 'zio_taskq_write_tpq' CPUs,
11841184
* not to exceed the number of spa allocators, and align to it.
11851185
*/
1186-
cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
1187-
count = MAX(1, cpus / MAX(1, zio_taskq_write_tpq));
1186+
threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
1187+
count = MAX(1, threads / MAX(1, zio_taskq_write_tpq));
11881188
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
11891189
count = MIN(count, spa->spa_alloc_count);
11901190
while (spa->spa_alloc_count % count != 0 &&
@@ -1201,14 +1201,14 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
12011201
break;
12021202

12031203
case ZTI_MODE_SCALE:
1204-
flags |= TASKQ_THREADS_CPU_PCT;
12051204
/*
12061205
* We want more taskqs to reduce lock contention, but we want
12071206
* less for better request ordering and CPU utilization.
12081207
*/
1209-
cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
1208+
threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
1209+
threads = MAX(threads, value);
12101210
if (zio_taskq_batch_tpq > 0) {
1211-
count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) /
1211+
count = MAX(1, (threads + zio_taskq_batch_tpq / 2) /
12121212
zio_taskq_batch_tpq);
12131213
} else {
12141214
/*
@@ -1228,13 +1228,23 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
12281228
* 128 10 8% 10 100
12291229
* 256 14 6% 15 210
12301230
*/
1231-
count = 1 + cpus / 6;
1231+
cpus = MIN(threads, boot_ncpus);
1232+
count = 1 + threads / 6;
12321233
while (count * count > cpus)
12331234
count--;
12341235
}
1235-
/* Limit each taskq within 100% to not trigger assertion. */
1236-
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
1237-
value = (zio_taskq_batch_pct + count / 2) / count;
1236+
1237+
/*
1238+
* Try to represent the number of threads per taskq as percent
1239+
* of online CPUs to allow scaling with later online/offline.
1240+
* Fall back to absolute numbers if can't.
1241+
*/
1242+
value = (threads * 100 + boot_ncpus * count / 2) /
1243+
(boot_ncpus * count);
1244+
if (value < 5 || value > 100)
1245+
value = MAX(1, (threads + count / 2) / count);
1246+
else
1247+
flags |= TASKQ_THREADS_CPU_PCT;
12381248
break;
12391249

12401250
case ZTI_MODE_NULL:
@@ -1433,8 +1443,30 @@ spa_taskq_param_set(zio_type_t t, char *cfg)
14331443
break;
14341444
}
14351445

1446+
/*
1447+
* SCALE is optionally parameterised by minimum number of
1448+
* threads.
1449+
*/
14361450
case ZTI_MODE_SCALE: {
1437-
const zio_taskq_info_t zti = ZTI_SCALE;
1451+
unsigned long long mint = 0;
1452+
if (c != NULL && *c != '\0') {
1453+
/* Need a number */
1454+
if (!(isdigit(*c)))
1455+
break;
1456+
tok = c;
1457+
1458+
/* Take digits */
1459+
err = ddi_strtoull(tok, &tok, 10, &mint);
1460+
/* Must succeed, and moved forward */
1461+
if (err != 0 || tok == c || *tok != '\0')
1462+
break;
1463+
1464+
/* Sanity check */
1465+
if (mint >= 16384)
1466+
break;
1467+
}
1468+
1469+
const zio_taskq_info_t zti = ZTI_SCALE(mint);
14381470
row[q] = zti;
14391471
break;
14401472
}
@@ -1501,6 +1533,9 @@ spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
15011533
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
15021534
modes[zti->zti_mode], zti->zti_count,
15031535
zti->zti_value);
1536+
else if (zti->zti_mode == ZTI_MODE_SCALE && zti->zti_value > 0)
1537+
pos += sprintf(&buf[pos], "%s%s,%u", sep,
1538+
modes[zti->zti_mode], zti->zti_value);
15041539
else
15051540
pos += sprintf(&buf[pos], "%s%s", sep,
15061541
modes[zti->zti_mode]);
@@ -1520,9 +1555,10 @@ spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
15201555
{
15211556
char *cfg = kmem_strdup(val);
15221557
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
1523-
kmem_free(cfg, strlen(val)+1);
1558+
kmem_strfree(cfg);
15241559
return (-err);
15251560
}
1561+
15261562
static int
15271563
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
15281564
{
@@ -1534,14 +1570,30 @@ spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
15341570
{
15351571
char *cfg = kmem_strdup(val);
15361572
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
1537-
kmem_free(cfg, strlen(val)+1);
1573+
kmem_strfree(cfg);
15381574
return (-err);
15391575
}
1576+
15401577
static int
15411578
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
15421579
{
15431580
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
15441581
}
1582+
1583+
static int
1584+
spa_taskq_free_param_set(const char *val, zfs_kernel_param_t *kp)
1585+
{
1586+
char *cfg = kmem_strdup(val);
1587+
int err = spa_taskq_param_set(ZIO_TYPE_FREE, cfg);
1588+
kmem_strfree(cfg);
1589+
return (-err);
1590+
}
1591+
1592+
static int
1593+
spa_taskq_free_param_get(char *buf, zfs_kernel_param_t *kp)
1594+
{
1595+
return (spa_taskq_param_get(ZIO_TYPE_FREE, buf, TRUE));
1596+
}
15451597
#else
15461598
/*
15471599
* On FreeBSD load-time parameters can be set up before malloc() is available,
@@ -1574,6 +1626,19 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
15741626
return (err);
15751627
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
15761628
}
1629+
1630+
static int
1631+
spa_taskq_free_param(ZFS_MODULE_PARAM_ARGS)
1632+
{
1633+
char buf[SPA_TASKQ_PARAM_MAX];
1634+
int err;
1635+
1636+
(void) spa_taskq_param_get(ZIO_TYPE_FREE, buf, FALSE);
1637+
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
1638+
if (err || req->newptr == NULL)
1639+
return (err);
1640+
return (spa_taskq_param_set(ZIO_TYPE_FREE, buf));
1641+
}
15771642
#endif
15781643
#endif /* _KERNEL */
15791644

@@ -11273,6 +11338,9 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
1127311338
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
1127411339
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW,
1127511340
"Configure IO queues for write IO");
11341+
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_free,
11342+
spa_taskq_free_param_set, spa_taskq_free_param_get, ZMOD_RW,
11343+
"Configure IO queues for free IO");
1127611344
#endif
1127711345

1127811346
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW,

0 commit comments

Comments
 (0)