Skip to content

Commit 93b651c

Browse files
committed
ZIO: Set minimum number of free issue threads to 32
Free issue threads might block waiting for synchronous DDT, BRT or GANG header reads. So unlike other taskqs using ZTI_SCALE to scale with number of CPUs, here we also need some amount of threads to potentially saturate pool reads. I am not sure we always want the 96 threads we had before ZTI_SCALE introduction at #11966 on small systems, but lets make it to at least 32. While here, make free taskqs configurable, similar to read and write ones. Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
1 parent 8c225ff commit 93b651c

File tree

3 files changed

+115
-9
lines changed

3 files changed

+115
-9
lines changed

include/os/freebsd/spl/sys/mod.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@
104104
#define spa_taskq_write_param_set_args(var) \
105105
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
106106

107+
#define spa_taskq_free_param_set_args(var) \
108+
CTLTYPE_STRING, NULL, 0, spa_taskq_free_param, "A"
109+
107110
#define fletcher_4_param_set_args(var) \
108111
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
109112

man/man4/zfs.4

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,12 +2660,50 @@ Set value only applies to pools imported/created after that.
26602660
Set the queue and thread configuration for the IO read queues.
26612661
This is an advanced debugging parameter.
26622662
Don't change this unless you understand what it does.
2663+
Each of the four values corresponds to the issue, issue high-priority,
2664+
interrupt, and interrupt high-priority queues.
2665+
Valid values are
2666+
.Sy fixed,N,M
2667+
(M queues with N threads each),
2668+
.Sy scale[,MIN]
2669+
(scale with CPUs, minimum MIN total threads),
2670+
.Sy sync ,
2671+
and
2672+
.Sy null .
26632673
Set values only apply to pools imported/created after that.
26642674
.
26652675
.It Sy zio_taskq_write Ns = Ns Sy sync null scale null Pq charp
26662676
Set the queue and thread configuration for the IO write queues.
26672677
This is an advanced debugging parameter.
26682678
Don't change this unless you understand what it does.
2679+
Each of the four values corresponds to the issue, issue high-priority,
2680+
interrupt, and interrupt high-priority queues.
2681+
Valid values are
2682+
.Sy fixed,N,M
2683+
(M queues with N threads each),
2684+
.Sy scale[,MIN]
2685+
(scale with CPUs, minimum MIN total threads),
2686+
.Sy sync ,
2687+
and
2688+
.Sy null .
2689+
Set values only apply to pools imported/created after that.
2690+
.
2691+
.It Sy zio_taskq_free Ns = Ns Sy scale,32 null null null Pq charp
2692+
Set the queue and thread configuration for the IO free queues.
2693+
This is an advanced debugging parameter.
2694+
Don't change this unless you understand what it does.
2695+
Each of the four values corresponds to the issue, issue high-priority,
2696+
interrupt, and interrupt high-priority queues.
2697+
Valid values are
2698+
.Sy fixed,N,M
2699+
(M queues with N threads each),
2700+
.Sy scale[,MIN]
2701+
(scale with CPUs, minimum MIN total threads),
2702+
.Sy sync ,
2703+
and
2704+
.Sy null .
2705+
The default uses a minimum of 32 threads to improve parallelism for
2706+
DDT and BRT metadata operations during frees.
26692707
Set values only apply to pools imported/created after that.
26702708
.
26712709
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint

module/zfs/spa.c

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ typedef enum zti_modes {
141141

142142
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
143143
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
144-
#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
144+
#define ZTI_SCALE(min) { ZTI_MODE_SCALE, (min), 1 }
145145
#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
146146
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
147147

@@ -180,13 +180,13 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
180180
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
181181
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
182182
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
183-
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
183+
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* READ */
184184
#ifdef illumos
185-
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
185+
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE(0), ZTI_N(5) }, /* WRITE */
186186
#else
187-
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */
187+
{ ZTI_SYNC, ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* WRITE */
188188
#endif
189-
{ ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
189+
{ ZTI_SCALE(32), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
190190
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
191191
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */
192192
{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */
@@ -1207,6 +1207,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
12071207
* less for better request ordering and CPU utilization.
12081208
*/
12091209
cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
1210+
cpus = MAX(cpus, value);
12101211
if (zio_taskq_batch_tpq > 0) {
12111212
count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) /
12121213
zio_taskq_batch_tpq);
@@ -1232,9 +1233,15 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
12321233
while (count * count > cpus)
12331234
count--;
12341235
}
1235-
/* Limit each taskq within 100% to not trigger assertion. */
1236-
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
1237-
value = (zio_taskq_batch_pct + count / 2) / count;
1236+
/*
1237+
* Calculate percentage per taskq to achieve 'cpus' threads
1238+
* total. The percentage is relative to boot_ncpus.
1239+
* Ensure we have enough taskqs to avoid exceeding 100%.
1240+
*/
1241+
count = MAX(count, (cpus + boot_ncpus - 1) / boot_ncpus);
1242+
value = (cpus * 100 + boot_ncpus * count / 2) /
1243+
(boot_ncpus * count);
1244+
value = MIN(value, 100);
12381245
break;
12391246

12401247
case ZTI_MODE_NULL:
@@ -1433,8 +1440,30 @@ spa_taskq_param_set(zio_type_t t, char *cfg)
14331440
break;
14341441
}
14351442

1443+
/*
1444+
* SCALE is optionally parameterised by minimum number of
1445+
* threads.
1446+
*/
14361447
case ZTI_MODE_SCALE: {
1437-
const zio_taskq_info_t zti = ZTI_SCALE;
1448+
unsigned long long mint = 0;
1449+
if (c != NULL && *c != '\0') {
1450+
/* Need a number */
1451+
if (!(isdigit(*c)))
1452+
break;
1453+
tok = c;
1454+
1455+
/* Take digits */
1456+
err = ddi_strtoull(tok, &tok, 10, &mint);
1457+
/* Must succeed, and moved forward */
1458+
if (err != 0 || tok == c || *tok != '\0')
1459+
break;
1460+
1461+
/* Sanity check */
1462+
if (mint >= 16384)
1463+
break;
1464+
}
1465+
1466+
const zio_taskq_info_t zti = ZTI_SCALE(mint);
14381467
row[q] = zti;
14391468
break;
14401469
}
@@ -1501,6 +1530,9 @@ spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
15011530
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
15021531
modes[zti->zti_mode], zti->zti_count,
15031532
zti->zti_value);
1533+
else if (zti->zti_mode == ZTI_MODE_SCALE && zti->zti_value > 0)
1534+
pos += sprintf(&buf[pos], "%s%s,%u", sep,
1535+
modes[zti->zti_mode], zti->zti_value);
15041536
else
15051537
pos += sprintf(&buf[pos], "%s%s", sep,
15061538
modes[zti->zti_mode]);
@@ -1523,6 +1555,7 @@ spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
15231555
kmem_free(cfg, strlen(val)+1);
15241556
return (-err);
15251557
}
1558+
15261559
static int
15271560
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
15281561
{
@@ -1537,11 +1570,27 @@ spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
15371570
kmem_free(cfg, strlen(val)+1);
15381571
return (-err);
15391572
}
1573+
15401574
static int
15411575
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
15421576
{
15431577
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
15441578
}
1579+
1580+
static int
1581+
spa_taskq_free_param_set(const char *val, zfs_kernel_param_t *kp)
1582+
{
1583+
char *cfg = kmem_strdup(val);
1584+
int err = spa_taskq_param_set(ZIO_TYPE_FREE, cfg);
1585+
kmem_free(cfg, strlen(val)+1);
1586+
return (-err);
1587+
}
1588+
1589+
static int
1590+
spa_taskq_free_param_get(char *buf, zfs_kernel_param_t *kp)
1591+
{
1592+
return (spa_taskq_param_get(ZIO_TYPE_FREE, buf, TRUE));
1593+
}
15451594
#else
15461595
/*
15471596
* On FreeBSD load-time parameters can be set up before malloc() is available,
@@ -1574,6 +1623,19 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
15741623
return (err);
15751624
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
15761625
}
1626+
1627+
static int
1628+
spa_taskq_free_param(ZFS_MODULE_PARAM_ARGS)
1629+
{
1630+
char buf[SPA_TASKQ_PARAM_MAX];
1631+
int err;
1632+
1633+
(void) spa_taskq_param_get(ZIO_TYPE_FREE, buf, FALSE);
1634+
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
1635+
if (err || req->newptr == NULL)
1636+
return (err);
1637+
return (spa_taskq_param_set(ZIO_TYPE_FREE, buf));
1638+
}
15771639
#endif
15781640
#endif /* _KERNEL */
15791641

@@ -11273,6 +11335,9 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
1127311335
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
1127411336
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW,
1127511337
"Configure IO queues for write IO");
11338+
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_free,
11339+
spa_taskq_free_param_set, spa_taskq_free_param_get, ZMOD_RW,
11340+
"Configure IO queues for free IO");
1127611341
#endif
1127711342

1127811343
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW,

0 commit comments

Comments
 (0)