Skip to content

Commit 98c8c78

Browse files
committed
zvol: Fix blk-mq sync
The zvol blk-mq codepaths would erroneously send FLUSH and TRIM commands down the read codepath, rather than write. This fixes the issue, and updates the zvol_misc_fua test to verify that sync writes are actually happening. Fixes: #17761 Signed-off-by: Tony Hutter <hutter2@llnl.gov>
1 parent d147ed7 commit 98c8c78

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

include/os/linux/kernel/linux/blkdev_compat.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,18 @@ static inline int
551551
io_data_dir(struct bio *bio, struct request *rq)
552552
{
553553
if (rq != NULL) {
554-
if (op_is_write(req_op(rq))) {
554+
/*
555+
* Flush & trim requests go down the zvol_write codepath. Or
556+
* more specifically:
557+
*
558+
* If request is a write, or if it's op_is_sync() and not a
559+
* read, or if it's a flush, or if it's a discard, then send the
560+
* request down the write path.
561+
*/
562+
if (op_is_write(rq->cmd_flags) ||
563+
(op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) ||
564+
req_op(rq) == REQ_OP_FLUSH ||
565+
op_is_discard(rq->cmd_flags)) {
555566
return (WRITE);
556567
} else {
557568
return (READ);

tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,53 @@ fi
5050

5151
typeset datafile1="$(mktemp -t zvol_misc_fua1.XXXXXX)"
5252
typeset datafile2="$(mktemp -t zvol_misc_fua2.XXXXXX)"
53+
typeset datafile3="$(mktemp -t zvol_misc_fua3_log.XXXXXX)"
5354
typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
5455

56+
typeset DISK1=${DISKS%% *}
5557
function cleanup
5658
{
57-
rm "$datafile1" "$datafile2"
59+
log_must zpool remove $TESTPOOL $datafile3
60+
rm "$datafile1" "$datafile2" "$datafile2"
61+
}
62+
63+
# Prints the total number of sync writes for a vdev
64+
# $1: vdev
65+
function get_sync
66+
{
67+
zpool iostat -p -H -v -r $TESTPOOL $1 | \
68+
awk '/[0-9]+$/{s+=$4+$5} END{print s}'
5869
}
5970

6071
function do_test {
6172
# Wait for udev to create symlinks to our zvol
6273
block_device_wait $zvolpath
6374

75+
# Write using sync (creates FLUSH calls after writes, but not FUA)
76+
old_vdev_writes=$(get_sync $DISK1)
77+
old_log_writes=$(get_sync $datafile3)
78+
79+
log_must fio --name=write_iops --size=5M \
80+
--ioengine=libaio --verify=0 --bs=4K \
81+
--iodepth=1 --rw=randwrite --group_reporting=1 \
82+
--filename=$zvolpath --sync=1
83+
84+
vdev_writes=$(( $(get_sync $DISK1) - $old_vdev_writes))
85+
log_writes=$(( $(get_sync $datafile3) - $old_log_writes))
86+
87+
# When we're doing sync writes, we should see many more writes go to
88+
# the log vs the first vdev. Experiments show anywhere from a 160-320x
89+
# ratio of writes to the log vs the first vdev (due to some straggler
90+
# writes to the first vdev).
91+
#
92+
# Check that we have a large ratio (100x) of sync writes going to the
93+
# log device
94+
ratio=$(($log_writes / $vdev_writes))
95+
log_note "Got $log_writes log writes, $vdev_writes vdev writes."
96+
if [ $ratio -lt 100 ] ; then
97+
log_fail "Expected > 100x more log writes than vdev writes. "
98+
fi
99+
64100
# Create a data file
65101
log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
66102

@@ -81,6 +117,8 @@ log_assert "Verify that a ZFS volume can do Force Unit Access (FUA)"
81117
log_onexit cleanup
82118

83119
log_must zfs set compression=off $TESTPOOL/$TESTVOL
120+
log_must truncate -s 100M $datafile3
121+
log_must zpool add $TESTPOOL log $datafile3
84122

85123
log_note "Testing without blk-mq"
86124

0 commit comments

Comments
 (0)