From d7b5ba84c432a7be04da52d4fa50f44ddc07554b Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 28 Jul 2023 01:37:56 +0000 Subject: [PATCH 01/71] module/bdev/raid: Add RAID5 implementation --- module/bdev/raid/Makefile | 2 +- module/bdev/raid/bdev_raid.c | 2 + module/bdev/raid/bdev_raid.h | 1 + module/bdev/raid/raid5.c | 692 +++++++++++++++++++++++++++++++++++ 4 files changed, 696 insertions(+), 1 deletion(-) create mode 100644 module/bdev/raid/raid5.c diff --git a/module/bdev/raid/Makefile b/module/bdev/raid/Makefile index 17ad7f41938..22ad8c0003e 100644 --- a/module/bdev/raid/Makefile +++ b/module/bdev/raid/Makefile @@ -10,7 +10,7 @@ SO_VER := 5 SO_MINOR := 0 CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ -C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c raid1.c concat.c +C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c raid1.c concat.c raid5.c ifeq ($(CONFIG_RAID5F),y) C_SRCS += raid5f.c diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index d1c8ea8e240..5c14f357016 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -763,6 +763,8 @@ static struct { { "0", RAID0 }, { "raid1", RAID1 }, { "1", RAID1 }, + { "raid5", RAID5 }, + { "5", RAID5 }, { "raid5f", RAID5F }, { "5f", RAID5F }, { "concat", CONCAT }, diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index c6e31ea2ca2..708076c1895 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -13,6 +13,7 @@ enum raid_level { INVALID_RAID_LEVEL = -1, RAID0 = 0, RAID1 = 1, + RAID5 = 5, RAID5F = 95, /* 0x5f */ CONCAT = 99, }; diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c new file mode 100644 index 00000000000..c6c25fb208b --- /dev/null +++ b/module/bdev/raid/raid5.c @@ -0,0 +1,692 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2019 Intel Corporation. + * All rights reserved. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#include "bdev_raid.h" + +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "lib/thread/thread_internal.h" + +#include "spdk/log.h" + +static inline uint8_t +raid5_parity_strip_index(struct raid_bdev *raid_bdev, uint64_t stripe_index) { + return raid_bdev->num_base_bdevs - 1 - stripe_index % raid_bdev->num_base_bdevs; +} + +static inline struct iovec * +raid5_get_buffer(size_t iovlen) { + struct iovec *buffer; + + buffer = calloc(1, sizeof(*buffer)); + if (buffer == NULL) { + return NULL; + } + + buffer->iov_len = iovlen; + buffer->iov_base = calloc(buffer->iov_len, sizeof(char)); + if (buffer->iov_base == NULL) { + free(buffer); + return NULL; + } + + return buffer; +} + +static inline void +raid5_free_buffer(struct iovec *buffer) { + free(buffer->iov_base); + free(buffer); +} + +static void +raid5_fill_buffer_with_zeroes(struct iovec *buffer) { + uint64_t *b8 = buffer->iov_base; + char *b = buffer->iov_base; + size_t len8 = buffer->iov_len / 8; + size_t len = buffer->iov_len; + + for (size_t i=0; i < len8; ++i) { + b8[i] = 0; + } + + len8 *= 8; + for (size_t i = len8; i < len; ++i) { + b[i] = 0; + } +} + +static void +raid5_xor_buffers(struct iovec *xor_res, struct iovec *buffer) { + uint64_t *xb8 = xor_res->iov_base; + uint64_t *b8 = buffer->iov_base; + char *xb = xor_res->iov_base; + char *b = buffer->iov_base; + size_t len8 = xor_res->iov_len / 8; + size_t len = xor_res->iov_len; + + for (size_t i=0; i < len8; ++i) { + xb8[i] ^= b8[i]; + } + + len8 *= 8; + for (size_t i = len8; i < len; ++i) { + xb[i] ^= b[i]; + } +} + +static void +raid5_copy_iovec(struct iovec *dst, struct iovec *src) { + uint64_t *db8 = dst->iov_base; + uint64_t *sb8 = src->iov_base; + char *db = dst->iov_base; + char *sb = src->iov_base; + size_t len8 = dst->iov_len / 8; + size_t len = dst->iov_len; + + for (size_t i=0; i < len8; ++i) { + db8[i] = sb8[i]; + } + + len8 *= 8; + for (size_t i = len8; i < len; ++i) { + db[i] = sb[i]; + } +} + +static void +raid5_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_bdev_io *raid_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + raid_bdev_io_complete_part(raid_io, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + +static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); + +static void +_raid5_submit_rw_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; + + raid5_submit_rw_request(raid_io); +} + +static int +raid5_submit_read_request(struct raid_bdev_io *raid_io) { + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct spdk_bdev_ext_io_opts io_opts = {}; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; + uint64_t stripe_index; + uint64_t parity_strip_idx; + uint64_t req_bdev_idx; + uint32_t offset_in_strip; + uint64_t offset_blocks; + uint64_t num_blocks; + int ret = 0; + uint64_t start_strip_idx; + uint64_t end_strip_idx; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + + start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + if (start_strip_idx != end_strip_idx) { + SPDK_ERRLOG("I/O spans strip boundary!\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return -EINVAL; + } + + assert(raid_ch != NULL); + assert(raid_ch->base_channel); + + io_opts.size = sizeof(io_opts); + io_opts.memory_domain = bdev_io->u.bdev.memory_domain; + io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; + io_opts.metadata = bdev_io->u.bdev.md_buf; + + stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); + parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); + offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); + + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); + if (req_bdev_idx >= parity_strip_idx) { + ++req_bdev_idx; + } + offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; + num_blocks = bdev_io->u.bdev.num_blocks; + + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; + + if (base_ch != NULL) { + // reading only one strip case + raid_io->base_bdev_io_remaining = 1; + + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret == -ENOMEM) { + SPDK_ERRLOG("ENOMEM on reading request in RAID5\n"); + assert(false); + + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + return 0; + } + + return ret; + } else { + // reading stripe case + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; + } + + struct iovec *buffer = raid5_get_buffer(num_blocks * block_size_b); + if (buffer == NULL) { + return -ENOMEM; + } + + struct iovec *xor_res = raid5_get_buffer(num_blocks * block_size_b); + if (xor_res == NULL) { + raid5_free_buffer(buffer); + return -ENOMEM; + } + + raid5_fill_buffer_with_zeroes(xor_res); + + uint8_t num_base_bdevs = raid_bdev->num_base_bdevs; + + for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + if (base_ch == NULL) { + if (idx == req_bdev_idx) { + continue; + } else { + SPDK_ERRLOG("2 broken strips\n"); + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return -EIO; + } + } + + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + buffer, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on read request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + + raid5_xor_buffers(xor_res, buffer); + + raid_io->base_bdev_io_submitted++; + } + + // copying result to request iovec + raid5_copy_iovec(bdev_io->u.bdev.iovs, xor_res); + + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); + + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + + return 0; + } +} + +static int +raid5_submit_write_request(struct raid_bdev_io *raid_io) { + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct spdk_bdev_ext_io_opts io_opts = {}; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; + uint8_t num_base_bdevs = raid_bdev->num_base_bdevs; + uint8_t broken_bdev_idx = num_base_bdevs; + uint64_t stripe_index; + uint64_t parity_strip_idx; + uint64_t req_bdev_idx; + uint32_t offset_in_strip; + uint64_t offset_blocks; + uint64_t num_blocks; + int ret = 0; + uint64_t start_strip_idx; + uint64_t end_strip_idx; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + + start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + if (start_strip_idx != end_strip_idx) { + SPDK_ERRLOG("I/O spans strip boundary!\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return -EINVAL; + } + + assert(raid_ch != NULL); + assert(raid_ch->base_channel); + + io_opts.size = sizeof(io_opts); + io_opts.memory_domain = bdev_io->u.bdev.memory_domain; + io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; + io_opts.metadata = bdev_io->u.bdev.md_buf; + + stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); + parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); + offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); + + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); + if (req_bdev_idx >= parity_strip_idx) { + ++req_bdev_idx; + } + offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; + num_blocks = bdev_io->u.bdev.num_blocks; + parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); + + // calculating of broken strip idx + for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { + if (raid_ch->base_channel[idx] == NULL) { + if (broken_bdev_idx == num_base_bdevs) { + broken_bdev_idx = idx; + } else { + SPDK_ERRLOG("2 broken strips\n"); + return -EIO; + } + } + } + + if (broken_bdev_idx == parity_strip_idx) { + raid_io->base_bdev_io_remaining = 1; + + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; + + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret == -ENOMEM) { + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + return 0; + } + return ret; + } else { + struct iovec *buffer = raid5_get_buffer(num_blocks * block_size_b); + if (buffer == NULL) { + return -ENOMEM; + } + + struct iovec *xor_res = raid5_get_buffer(num_blocks * block_size_b); + if (xor_res == NULL) { + raid5_free_buffer(buffer); + return -ENOMEM; + } + + if (broken_bdev_idx != req_bdev_idx && broken_bdev_idx != num_base_bdevs) { + raid_io->base_bdev_io_remaining = 4; + + // reading + + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + xor_res, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + raid_io->base_bdev_io_submitted++; + + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + buffer, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + raid_io->base_bdev_io_submitted++; + + // new parity calculation + + raid5_xor_buffers(xor_res, buffer); + raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + + // writing + + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + raid_io->base_bdev_io_submitted++; + + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + xor_res, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + raid_io->base_bdev_io_submitted++; + } else { + if (broken_bdev_idx == req_bdev_idx) { + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; + } + raid5_fill_buffer_with_zeroes(xor_res); + + for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + if (idx == parity_strip_idx || idx == req_bdev_idx) { + continue; + } + + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + buffer, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + + raid5_xor_buffers(xor_res, buffer); + + raid_io->base_bdev_io_submitted++; + } + + raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + xor_res, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + } else { + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; + } + raid5_fill_buffer_with_zeroes(xor_res); + + for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + if (idx == parity_strip_idx || idx == req_bdev_idx) { + continue; + } + + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + buffer, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + + raid5_xor_buffers(xor_res, buffer); + + raid_io->base_bdev_io_submitted++; + } + + raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + xor_res, 1, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + if (ret != 0) { + if (ret == -ENOMEM) { + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + assert(false); + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + return ret; + } + } + } + raid5_free_buffer(buffer); + raid5_free_buffer(xor_res); + + return 0; + } +} + +static void +raid5_submit_rw_request(struct raid_bdev_io *raid_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + int ret = 0; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + ret = raid5_submit_read_request(raid_io); + break; + case SPDK_BDEV_IO_TYPE_WRITE: + ret = raid5_submit_write_request(raid_io); + break; + default: + ret = -EINVAL; + break; + } + + if (ret != 0) { + SPDK_ERRLOG("bdev io submit error, it should not happen\n"); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + assert(false); + } +} + +static uint64_t +raid5_calculate_blockcnt(struct raid_bdev *raid_bdev) +{ + uint64_t min_blockcnt = UINT64_MAX; + struct raid_base_bdev_info *base_info; + uint64_t total_stripes; + uint64_t stripe_blockcnt; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + min_blockcnt = spdk_min(min_blockcnt, spdk_bdev_desc_get_bdev(base_info->desc)->blockcnt); + } + + total_stripes = min_blockcnt / raid_bdev->strip_size; + stripe_blockcnt = raid_bdev->strip_size * (raid_bdev->num_base_bdevs - 1); + + SPDK_DEBUGLOG(bdev_raid5, "min blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", + min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); + + return total_stripes * stripe_blockcnt; +} + +static int +raid5_start(struct raid_bdev *raid_bdev) +{ + raid_bdev->bdev.blockcnt = raid5_calculate_blockcnt(raid_bdev); + raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; + raid_bdev->bdev.split_on_optimal_io_boundary = true; + raid_bdev->min_base_bdevs_operational = raid_bdev->num_base_bdevs - 1; + + return 0; +} + +static void +raid5_resize(struct raid_bdev *raid_bdev) +{ + uint64_t blockcnt; + int rc; + + blockcnt = raid5_calculate_blockcnt(raid_bdev); + + if (blockcnt == raid_bdev->bdev.blockcnt) { + return; + } + + SPDK_NOTICELOG("raid5 '%s': min blockcount was changed from %" PRIu64 " to %" PRIu64 "\n", + raid_bdev->bdev.name, + raid_bdev->bdev.blockcnt, + blockcnt); + + rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt); + if (rc != 0) { + SPDK_ERRLOG("Failed to notify blockcount change\n"); + } +} + +static struct raid_bdev_module g_raid5_module = { + .level = RAID5, + .base_bdevs_min = 3, + .memory_domains_supported = true, + .start = raid5_start, + .submit_rw_request = raid5_submit_rw_request, + .resize = raid5_resize +}; +RAID_MODULE_REGISTER(&g_raid5_module) + +SPDK_LOG_REGISTER_COMPONENT(bdev_raid5) From 2043b9210ed24b76ef095c8776d547b7337e5b10 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Sat, 16 Sep 2023 06:15:09 +0000 Subject: [PATCH 02/71] module/bdev/raid: Add request async processing. module/bdev/raid: Add async processing of request to strip for RAID5. --- module/bdev/raid/raid5.c | 945 +++++++++++++++++++++++++-------------- 1 file changed, 608 insertions(+), 337 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index c6c25fb208b..a7e1017b3b8 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -14,13 +14,27 @@ #include "spdk/log.h" +struct raid5_io_buffer { + struct raid_bdev_io *raid_io; + + struct iovec *buffer; +}; + +struct raid5_write_request_buffer { + struct raid5_io_buffer *wr_xor_buff; + + struct iovec *buffer; +}; + static inline uint8_t -raid5_parity_strip_index(struct raid_bdev *raid_bdev, uint64_t stripe_index) { +raid5_parity_strip_index(struct raid_bdev *raid_bdev, uint64_t stripe_index) +{ return raid_bdev->num_base_bdevs - 1 - stripe_index % raid_bdev->num_base_bdevs; } static inline struct iovec * -raid5_get_buffer(size_t iovlen) { +raid5_get_buffer(size_t iovlen) +{ struct iovec *buffer; buffer = calloc(1, sizeof(*buffer)); @@ -39,66 +53,137 @@ raid5_get_buffer(size_t iovlen) { } static inline void -raid5_free_buffer(struct iovec *buffer) { +raid5_free_buffer(struct iovec *buffer) +{ free(buffer->iov_base); free(buffer); } -static void -raid5_fill_buffer_with_zeroes(struct iovec *buffer) { - uint64_t *b8 = buffer->iov_base; - char *b = buffer->iov_base; - size_t len8 = buffer->iov_len / 8; - size_t len = buffer->iov_len; +static inline struct raid5_io_buffer * +raid5_get_io_buffer(struct raid_bdev_io *raid_io, size_t data_len) +{ + struct raid5_io_buffer *io_buffer; - for (size_t i=0; i < len8; ++i) { - b8[i] = 0; + io_buffer = calloc(1, sizeof(struct raid5_io_buffer)); + if (io_buffer == NULL) { + return NULL; } - len8 *= 8; - for (size_t i = len8; i < len; ++i) { - b[i] = 0; + io_buffer->buffer = raid5_get_buffer(data_len); + if (io_buffer->buffer == NULL) { + free(io_buffer); + return NULL; } + + io_buffer->raid_io = raid_io; + return io_buffer; } -static void -raid5_xor_buffers(struct iovec *xor_res, struct iovec *buffer) { +static inline void +raid5_free_io_buffer(struct raid5_io_buffer *io_buffer) +{ + raid5_free_buffer(io_buffer->buffer); + free(io_buffer); +} + +static inline struct raid5_write_request_buffer * +raid5_get_write_request_buffer(struct raid5_io_buffer *wr_xor_buff, size_t data_len) +{ + struct raid5_write_request_buffer *wr_buffer; + + wr_buffer = calloc(1, sizeof(struct raid5_write_request_buffer)); + if (wr_buffer == NULL) { + return NULL; + } + + wr_buffer->buffer = raid5_get_buffer(data_len); + if (wr_buffer->buffer == NULL) { + free(wr_buffer); + return NULL; + } + + wr_buffer->wr_xor_buff = wr_xor_buff; + return wr_buffer; +} + +static inline void +raid5_free_write_request_buffer(struct raid5_write_request_buffer *wr_buffer) +{ + raid5_free_buffer(wr_buffer->buffer); + free(wr_buffer); +} + +static inline void +raid5_xor_buffers(struct iovec *xor_res, struct iovec *buffer) +{ uint64_t *xb8 = xor_res->iov_base; uint64_t *b8 = buffer->iov_base; - char *xb = xor_res->iov_base; - char *b = buffer->iov_base; size_t len8 = xor_res->iov_len / 8; - size_t len = xor_res->iov_len; for (size_t i=0; i < len8; ++i) { xb8[i] ^= b8[i]; } +} - len8 *= 8; - for (size_t i = len8; i < len; ++i) { - xb[i] ^= b[i]; +static inline void +raid5_xor_iovs_with_buffer(struct iovec *iovs, int iovcnt, struct iovec *buffer) +{ + uint64_t *xb8; + uint64_t *b8 = buffer->iov_base; + size_t b8i = 0; + size_t len8; + + for (int iovidx = 0; iovidx < iovcnt; ++iovidx) { + xb8 = iovs[iovidx].iov_base; + len8 = iovs[iovidx].iov_len / 8; + for (size_t i = 0; i < len8; ++i, ++b8i) { + xb8[i] ^= b8[b8i]; + } } } -static void -raid5_copy_iovec(struct iovec *dst, struct iovec *src) { - uint64_t *db8 = dst->iov_base; - uint64_t *sb8 = src->iov_base; - char *db = dst->iov_base; - char *sb = src->iov_base; - size_t len8 = dst->iov_len / 8; - size_t len = dst->iov_len; - - for (size_t i=0; i < len8; ++i) { - db8[i] = sb8[i]; +static inline void +raid5_xor_buffer_with_iovs(struct iovec *buffer, struct iovec *iovs, int iovcnt) +{ + uint64_t *xb8 = buffer->iov_base; + uint64_t *b8; + size_t xb8i = 0; + size_t len8; + + for (int iovidx = 0; iovidx < iovcnt; ++iovidx) { + b8 = iovs[iovidx].iov_base; + len8 = iovs[iovidx].iov_len / 8; + for (size_t i = 0; i < len8; ++i, ++xb8i) { + xb8[xb8i] ^= b8[i]; + } } +} - len8 *= 8; - for (size_t i = len8; i < len; ++i) { - db[i] = sb[i]; +static inline void +raid5_fill_iovs_with_zeroes(struct iovec *iovs, int iovcnt) +{ + uint64_t *b8; + size_t len8; + + for (int iovidx = 0; iovidx < iovcnt; ++iovidx) { + b8 = iovs[iovidx].iov_base; + len8 = iovs[iovidx].iov_len / 8; + for (size_t i = 0; i < len8; ++i) { + b8[i] = 0; + } } } +void +raid5_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg) +{ + raid_io->waitq_entry.bdev = bdev; + raid_io->waitq_entry.cb_fn = cb_fn; + raid_io->waitq_entry.cb_arg = cb_arg; + spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); +} + static void raid5_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { @@ -106,11 +191,128 @@ raid5_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_ar spdk_bdev_free_io(bdev_io); - raid_bdev_io_complete_part(raid_io, 1, success ? + raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED); } +static void +raid5_read_request_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid5_io_buffer *io_buffer = cb_arg; + struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(io_buffer->raid_io); + + spdk_bdev_free_io(bdev_io); + + assert(io_buffer->raid_io->base_bdev_io_remaining > 0); + io_buffer->raid_io->base_bdev_io_remaining--; + + if (!success) { + io_buffer->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + raid5_xor_iovs_with_buffer(rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt, + io_buffer->buffer); + } + + if (io_buffer->raid_io->base_bdev_io_remaining == 0) { + raid_bdev_io_complete(io_buffer->raid_io, + io_buffer->raid_io->base_bdev_io_status); + } + + raid5_free_io_buffer(io_buffer); +} + +static void raid5_submit_write_request_writing(struct raid5_io_buffer *io_buffer); + +static void +raid5_write_request_reading_complete_part (struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid5_write_request_buffer *wr_buffer = cb_arg; + struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(wr_buffer->wr_xor_buff->raid_io); + + spdk_bdev_free_io(bdev_io); + + assert(wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining > 0); + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining--; + + if (!success) { + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + raid5_xor_buffers(wr_buffer->wr_xor_buff->buffer, wr_buffer->buffer); + } + + if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining == 0) { + if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { + raid5_xor_buffer_with_iovs(wr_buffer->wr_xor_buff->buffer, + rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt); + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_submitted = 1; + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining = 1; + raid5_submit_write_request_writing(wr_buffer->wr_xor_buff); + } else { + raid_bdev_io_complete(wr_buffer->wr_xor_buff->raid_io, + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status); + raid5_free_io_buffer(wr_buffer->wr_xor_buff); + } + } + + raid5_free_write_request_buffer(wr_buffer); +} + +static void +raid5_write_request_reading_with_writing_req_strip_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid5_write_request_buffer *wr_buffer = cb_arg; + struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(wr_buffer->wr_xor_buff->raid_io); + + spdk_bdev_free_io(bdev_io); + + assert(wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining > 0); + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining--; + + if (!success) { + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + raid5_xor_buffers(wr_buffer->wr_xor_buff->buffer, wr_buffer->buffer); + } + + if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining == 0) { + if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { + raid5_xor_buffer_with_iovs(wr_buffer->wr_xor_buff->buffer, + rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt); + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_submitted = 0; + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining = 2; + raid5_submit_write_request_writing(wr_buffer->wr_xor_buff); + } else { + raid_bdev_io_complete(wr_buffer->wr_xor_buff->raid_io, + wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status); + raid5_free_io_buffer(wr_buffer->wr_xor_buff); + } + } + + raid5_free_write_request_buffer(wr_buffer); +} + +static void +raid5_write_request_writing_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid5_io_buffer *io_buffer = cb_arg; + + spdk_bdev_free_io(bdev_io); + + assert(io_buffer->raid_io->base_bdev_io_remaining > 0); + io_buffer->raid_io->base_bdev_io_remaining--; + + if (!success) { + io_buffer->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } + + if (io_buffer->raid_io->base_bdev_io_remaining == 0) { + raid_bdev_io_complete(io_buffer->raid_io, + io_buffer->raid_io->base_bdev_io_status); + raid5_free_io_buffer(io_buffer); + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void @@ -121,8 +323,9 @@ _raid5_submit_rw_request(void *_raid_io) raid5_submit_rw_request(raid_io); } -static int -raid5_submit_read_request(struct raid_bdev_io *raid_io) { +static void +raid5_submit_read_request(struct raid_bdev_io *raid_io) +{ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); struct spdk_bdev_ext_io_opts io_opts = {}; struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; @@ -147,7 +350,7 @@ raid5_submit_read_request(struct raid_bdev_io *raid_io) { SPDK_ERRLOG("I/O spans strip boundary!\n"); assert(false); raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - return -EINVAL; + return; } assert(raid_ch != NULL); @@ -161,7 +364,7 @@ raid5_submit_read_request(struct raid_bdev_io *raid_io) { stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); if (req_bdev_idx >= parity_strip_idx) { ++req_bdev_idx; @@ -173,8 +376,7 @@ raid5_submit_read_request(struct raid_bdev_io *raid_io) { base_ch = raid_ch->base_channel[req_bdev_idx]; if (base_ch != NULL) { - // reading only one strip case - raid_io->base_bdev_io_remaining = 1; + // case: reading only one strip ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, @@ -182,37 +384,31 @@ raid5_submit_read_request(struct raid_bdev_io *raid_io) { raid_io, &io_opts); if (ret == -ENOMEM) { - SPDK_ERRLOG("ENOMEM on reading request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), base_ch, _raid5_submit_rw_request); - return 0; + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); } - - return ret; } else { - // reading stripe case - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; - } + // case: broken request strip - struct iovec *buffer = raid5_get_buffer(num_blocks * block_size_b); - if (buffer == NULL) { - return -ENOMEM; - } + uint8_t start_idx; - struct iovec *xor_res = raid5_get_buffer(num_blocks * block_size_b); - if (xor_res == NULL) { - raid5_free_buffer(buffer); - return -ENOMEM; + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; + raid5_fill_iovs_with_zeroes(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt); } - raid5_fill_buffer_with_zeroes(xor_res); + start_idx = raid_io->base_bdev_io_submitted; + if (req_bdev_idx <= start_idx) { + start_idx++; + } - uint8_t num_base_bdevs = raid_bdev->num_base_bdevs; + for (uint8_t idx = start_idx; idx < raid_bdev->num_base_bdevs; ++idx) { + struct raid5_io_buffer *io_buffer; - for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { base_info = &raid_bdev->base_bdev_info[idx]; base_ch = raid_ch->base_channel[idx]; if (base_ch == NULL) { @@ -220,58 +416,72 @@ raid5_submit_read_request(struct raid_bdev_io *raid_io) { continue; } else { SPDK_ERRLOG("2 broken strips\n"); - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return -EIO; + assert(false); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - + (raid_bdev->num_base_bdevs - 1); + if (raid_io->base_bdev_io_remaining == 0) { + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } + return; } } + io_buffer = raid5_get_io_buffer(raid_io, num_blocks * block_size_b); + if (io_buffer == NULL) { + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + return; + } + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - buffer, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); + io_buffer->buffer, 1, + offset_blocks, num_blocks, raid5_read_request_complete_part, + io_buffer, &io_opts); if (ret != 0) { + raid5_free_io_buffer(io_buffer); if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on read request in RAID5\n"); - assert(false); raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), base_ch, _raid5_submit_rw_request); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - + (raid_bdev->num_base_bdevs - 1); + if (raid_io->base_bdev_io_remaining == 0) { + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } } - - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } - - raid5_xor_buffers(xor_res, buffer); + return; + } raid_io->base_bdev_io_submitted++; } + } +} - // copying result to request iovec - raid5_copy_iovec(bdev_io->u.bdev.iovs, xor_res); - - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); +static void raid5_submit_write_request_reading(struct raid5_io_buffer *wr_xor_buff); - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); +static void +_raid5_submit_write_request_reading(void *_wr_xor_buff) +{ + struct raid5_io_buffer *wr_xor_buff = _wr_xor_buff; - return 0; - } + raid5_submit_write_request_reading(wr_xor_buff); } -static int -raid5_submit_write_request(struct raid_bdev_io *raid_io) { +static void +raid5_submit_write_request_reading(struct raid5_io_buffer *wr_xor_buff) +{ + struct raid_bdev_io *raid_io = wr_xor_buff->raid_io; struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); struct spdk_bdev_ext_io_opts io_opts = {}; struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; struct raid_bdev *raid_bdev = raid_io->raid_bdev; uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; - uint8_t num_base_bdevs = raid_bdev->num_base_bdevs; - uint8_t broken_bdev_idx = num_base_bdevs; + uint8_t broken_bdev_idx = raid_bdev->num_base_bdevs; uint64_t stripe_index; uint64_t parity_strip_idx; uint64_t req_bdev_idx; @@ -280,22 +490,11 @@ raid5_submit_write_request(struct raid_bdev_io *raid_io) { uint64_t num_blocks; int ret = 0; uint64_t start_strip_idx; - uint64_t end_strip_idx; struct raid_base_bdev_info *base_info; struct spdk_io_channel *base_ch; + struct raid5_write_request_buffer *wr_buffer; start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> - raid_bdev->strip_size_shift; - if (start_strip_idx != end_strip_idx) { - SPDK_ERRLOG("I/O spans strip boundary!\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - return -EINVAL; - } - - assert(raid_ch != NULL); - assert(raid_ch->base_channel); io_opts.size = sizeof(io_opts); io_opts.memory_domain = bdev_io->u.bdev.memory_domain; @@ -305,297 +504,375 @@ raid5_submit_write_request(struct raid_bdev_io *raid_io) { stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); if (req_bdev_idx >= parity_strip_idx) { ++req_bdev_idx; } offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; num_blocks = bdev_io->u.bdev.num_blocks; - parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); - + // calculating of broken strip idx - for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { + for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { if (raid_ch->base_channel[idx] == NULL) { - if (broken_bdev_idx == num_base_bdevs) { + if (broken_bdev_idx == raid_bdev->num_base_bdevs) { broken_bdev_idx = idx; } else { SPDK_ERRLOG("2 broken strips\n"); - return -EIO; + assert(false); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + if (raid_io->base_bdev_io_submitted == 0) { + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } + return; } } } - if (broken_bdev_idx == parity_strip_idx) { - raid_io->base_bdev_io_remaining = 1; + if (broken_bdev_idx != req_bdev_idx && broken_bdev_idx != raid_bdev->num_base_bdevs) { + // case: broken strip isn't request strip or parity strip - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = 2; + } - if (ret == -ENOMEM) { - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); + switch (raid_io->base_bdev_io_submitted) { + case 0: + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; + + wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); + if (wr_buffer == NULL) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + return; + } - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - return 0; + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + wr_buffer->buffer, 1, + offset_blocks, num_blocks, raid5_write_request_reading_with_writing_req_strip_complete_part, + wr_buffer, &io_opts); + + if (ret != 0) { + raid5_free_write_request_buffer(wr_buffer); + if (ret == -ENOMEM) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid5_free_io_buffer(wr_xor_buff); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + } + return; + } + raid_io->base_bdev_io_submitted++; + case 1: + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; + + wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); + if (wr_buffer == NULL) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + return; + } + + ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, + wr_buffer->buffer, 1, + offset_blocks, num_blocks, raid5_write_request_reading_with_writing_req_strip_complete_part, + wr_buffer, &io_opts); + + if (ret != 0) { + raid5_free_write_request_buffer(wr_buffer); + if (ret == -ENOMEM) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - 2; + if (raid_io->base_bdev_io_remaining == 0) { + raid5_free_io_buffer(wr_xor_buff); + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } + } + return; + } + raid_io->base_bdev_io_submitted++; } - return ret; } else { - struct iovec *buffer = raid5_get_buffer(num_blocks * block_size_b); - if (buffer == NULL) { - return -ENOMEM; + // cases with reading stripe + + uint8_t start_idx; + spdk_bdev_io_completion_cb cb; + + if (broken_bdev_idx == req_bdev_idx) { + // case: broken request strip + cb = raid5_write_request_reading_complete_part; + } else { + // case: without broken strip + cb = raid5_write_request_reading_with_writing_req_strip_complete_part; } - struct iovec *xor_res = raid5_get_buffer(num_blocks * block_size_b); - if (xor_res == NULL) { - raid5_free_buffer(buffer); - return -ENOMEM; + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 2; } - if (broken_bdev_idx != req_bdev_idx && broken_bdev_idx != num_base_bdevs) { - raid_io->base_bdev_io_remaining = 4; - - // reading + start_idx = raid_io->base_bdev_io_submitted; + if (req_bdev_idx <= start_idx || parity_strip_idx <= start_idx) { + start_idx++; + if (req_bdev_idx <= start_idx && parity_strip_idx <= start_idx) { + start_idx++; + } + } - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - xor_res, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; + for (uint8_t idx = start_idx; idx < raid_bdev->num_base_bdevs; ++idx) { + if (idx == req_bdev_idx || idx == parity_strip_idx) { + continue; + } + + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); + if (wr_buffer == NULL) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + return; } - raid_io->base_bdev_io_submitted++; - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - buffer, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); + wr_buffer->buffer, 1, + offset_blocks, num_blocks, cb, + wr_buffer, &io_opts); + if (ret != 0) { + raid5_free_write_request_buffer(wr_buffer); if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_reading, wr_xor_buff); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - + (raid_bdev->num_base_bdevs - 2); + if (raid_io->base_bdev_io_remaining == 0) { + raid5_free_io_buffer(wr_xor_buff); + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; + return; } raid_io->base_bdev_io_submitted++; + } + } +} + +static void +_raid5_submit_write_request_writing(void *_io_buffer) +{ + struct raid5_io_buffer *io_buffer = _io_buffer; + + raid5_submit_write_request_writing(io_buffer); +} + +static void +raid5_submit_write_request_writing(struct raid5_io_buffer *io_buffer) +{ + struct raid_bdev_io *raid_io = io_buffer->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct spdk_bdev_ext_io_opts io_opts = {}; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t stripe_index; + uint64_t parity_strip_idx; + uint64_t req_bdev_idx; + uint32_t offset_in_strip; + uint64_t offset_blocks; + uint64_t num_blocks; + int ret = 0; + uint64_t start_strip_idx; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + + start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + + io_opts.size = sizeof(io_opts); + io_opts.memory_domain = bdev_io->u.bdev.memory_domain; + io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; + io_opts.metadata = bdev_io->u.bdev.md_buf; - // new parity calculation + stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); + parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); + offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - raid5_xor_buffers(xor_res, buffer); - raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); + if (req_bdev_idx >= parity_strip_idx) { + ++req_bdev_idx; + } + offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; + num_blocks = bdev_io->u.bdev.num_blocks; + + switch (raid_io->base_bdev_io_submitted) { + case 0: + // writing request strip - // writing + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_write_request_writing_complete_part, + io_buffer, &io_opts); + if (ret != 0) { if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_writing, io_buffer); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); + raid5_free_io_buffer(io_buffer); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; + return; } - raid_io->base_bdev_io_submitted++; + raid_io->base_bdev_io_submitted++; + case 1: + // writing parity strip + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; base_ch = raid_ch->base_channel[parity_strip_idx]; + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - xor_res, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); + io_buffer->buffer, 1, + offset_blocks, num_blocks, raid5_write_request_writing_complete_part, + io_buffer, &io_opts); + if (ret != 0) { if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_request_writing, io_buffer); + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - 2; + if (raid_io->base_bdev_io_remaining == 0) { + raid5_free_io_buffer(io_buffer); + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); + } } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; + return; } + raid_io->base_bdev_io_submitted++; - } else { - if (broken_bdev_idx == req_bdev_idx) { - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; - } - raid5_fill_buffer_with_zeroes(xor_res); - - for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { - base_info = &raid_bdev->base_bdev_info[idx]; - base_ch = raid_ch->base_channel[idx]; - if (idx == parity_strip_idx || idx == req_bdev_idx) { - continue; - } + } +} - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - buffer, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } +static void +raid5_submit_write_request(struct raid_bdev_io *raid_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct spdk_bdev_ext_io_opts io_opts = {}; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; + uint8_t broken_bdev_idx = raid_bdev->num_base_bdevs; + uint64_t stripe_index; + uint64_t parity_strip_idx; + uint64_t req_bdev_idx; + uint32_t offset_in_strip; + uint64_t offset_blocks; + uint64_t num_blocks; + int ret = 0; + uint64_t start_strip_idx; + uint64_t end_strip_idx; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } - - raid5_xor_buffers(xor_res, buffer); + start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + if (start_strip_idx != end_strip_idx) { + SPDK_ERRLOG("I/O spans strip boundary!\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } - raid_io->base_bdev_io_submitted++; - } + assert(raid_ch != NULL); + assert(raid_ch->base_channel); - raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + io_opts.size = sizeof(io_opts); + io_opts.memory_domain = bdev_io->u.bdev.memory_domain; + io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; + io_opts.metadata = bdev_io->u.bdev.md_buf; - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - xor_res, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } + stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); + parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); + offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); + + req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); + if (req_bdev_idx >= parity_strip_idx) { + ++req_bdev_idx; + } + offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; + num_blocks = bdev_io->u.bdev.num_blocks; + + // calculating of broken strip idx + for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { + if (raid_ch->base_channel[idx] == NULL) { + if (broken_bdev_idx == raid_bdev->num_base_bdevs) { + broken_bdev_idx = idx; } else { - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; - } - raid5_fill_buffer_with_zeroes(xor_res); - - for (uint8_t idx = 0; idx < num_base_bdevs; ++idx) { - base_info = &raid_bdev->base_bdev_info[idx]; - base_ch = raid_ch->base_channel[idx]; - if (idx == parity_strip_idx || idx == req_bdev_idx) { - continue; - } + SPDK_ERRLOG("2 broken strips\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + } + } - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - buffer, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } + if (broken_bdev_idx == parity_strip_idx) { + // case: broken parity strip - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } - - raid5_xor_buffers(xor_res, buffer); + base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; + base_ch = raid_ch->base_channel[req_bdev_idx]; - raid_io->base_bdev_io_submitted++; - } + ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + offset_blocks, num_blocks, raid5_bdev_io_completion, + raid_io, &io_opts); + + if (ret == -ENOMEM) { + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } else { + // cases with parity recalculating - raid5_xor_buffers(xor_res, &bdev_io->u.bdev.iovs[0]); + struct raid5_io_buffer *io_buffer; - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } + base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; + base_ch = raid_ch->base_channel[parity_strip_idx]; - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - xor_res, 1, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - SPDK_ERRLOG("ENOMEM on write request in RAID5\n"); - assert(false); - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return ret; - } - } + io_buffer = raid5_get_io_buffer(raid_io, num_blocks * block_size_b); + if (io_buffer == NULL) { + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_rw_request); + return; } - raid5_free_buffer(buffer); - raid5_free_buffer(xor_res); - return 0; + raid5_submit_write_request_reading(io_buffer); } } @@ -603,22 +880,16 @@ static void raid5_submit_rw_request(struct raid_bdev_io *raid_io) { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - int ret = 0; switch (bdev_io->type) { case SPDK_BDEV_IO_TYPE_READ: - ret = raid5_submit_read_request(raid_io); + raid5_submit_read_request(raid_io); break; case SPDK_BDEV_IO_TYPE_WRITE: - ret = raid5_submit_write_request(raid_io); + raid5_submit_write_request(raid_io); break; default: - ret = -EINVAL; - break; - } - - if (ret != 0) { - SPDK_ERRLOG("bdev io submit error, it should not happen\n"); + SPDK_ERRLOG("Invalid request type"); raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); assert(false); } From f3c00cfe0f6bb644f83553d8a7088cce76871071 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 01:40:52 +0000 Subject: [PATCH 03/71] test/bdev/raid5: Add script to run RAID5 tests. --- test/bdev/raid5/run_tests.sh | 147 +++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 test/bdev/raid5/run_tests.sh diff --git a/test/bdev/raid5/run_tests.sh b/test/bdev/raid5/run_tests.sh new file mode 100644 index 00000000000..14e7d4d6397 --- /dev/null +++ b/test/bdev/raid5/run_tests.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# before running: +# load the module ublk_drv.ko into the kernel: insmod {...}/ublk_drv.ko +# activate root mode: sudo -i +# activate virtualenv: source {virtualenv dir}/bin/activate +# run scipt from spdk dir: ./configure --with-ublk +# type 'make' to build spdk +# run this script: bash {path to script}/run_test.sh {full path to spdk} + +# $1 full path to spdk + +# exit codes: +# 1 - path to spdk isn't entered +# 2 - spdk_tgt does't start + +spdk=$1 + +function start() { + local old_dir=$(pwd); + cd $spdk; + + ./scripts/setup.sh >/dev/null& + pid=$!; + wait "$pid"; + + start-stop-daemon -Sbv -n spdk_tgt -x $spdk/build/bin/spdk_tgt; + sleep 5; + ./scripts/rpc.py ublk_create_target; + + cd $old_dir; +} + +function finish() { + local old_dir=$(pwd); + cd $spdk; + + ./scripts/rpc.py ublk_destroy_target; + start-stop-daemon -Kvx $spdk/build/bin/spdk_tgt; + cd $old_dir; +} + +# $1 full path to start json config +# $2 full path to crash base bdev json config +# $3 full path to stop json config +# $4 full path to fio config +function run_test_with_fio() { + local old_dir=$(pwd); + + cd $spdk; + ./scripts/rpc.py load_config -j $1; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 1; + fi + sleep 1; + ./scripts/rpc.py ublk_start_disk Raid5 1 >/dev/null; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 1; + fi + sleep 1; + + fio $4 >/dev/null; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 2 + fi + + ./scripts/rpc.py load_config -j $2; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 1; + fi + sleep 1; + + fio $4 >/dev/null; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 2 + fi + + ./scripts/rpc.py ublk_stop_disk 1; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 1; + fi + sleep 1; + ./scripts/rpc.py load_config -j $3; + if [[ "$?" != "0" ]]; then + cd $old_dir; + return 1; + fi + sleep 1; + + cd $old_dir; + return 0; +} + +function run_tests() { + local dir_sample=$(pwd)/tests/test; + local state=0; + local res; + echo -e "\033[34mtest with using fio results\033[0m"; + for (( i=0; i <= 2; i++ )) do + cat $dir_sample$i/info.txt; + echo; + if [[ "$state" != "0" ]]; then + echo -e "result: \033[33mskipped\033[0m"; + continue; + fi + run_test_with_fio $dir_sample$i/start.json $dir_sample$i/crash_base_bdev.json $dir_sample$i/stop.json $dir_sample$i/write.fio; + res=$?; + if [[ "$res" == 0 ]]; then + echo -e "write result: \033[32mpassed\033[0m"; + else + echo -e "write result: \033[31mfailed\033[0m"; + state=1 + fi + + if [[ "$state" != "0" ]]; then + echo -e "randwrite result: \033[33mskipped\033[0m"; + continue; + fi + run_test_with_fio $dir_sample$i/start.json $dir_sample$i/crash_base_bdev.json $dir_sample$i/stop.json $dir_sample$i/randwrite.fio; + res=$?; + if [[ "$res" == 0 ]]; then + echo -e "randwrite result: \033[32mpassed\033[0m"; + else + echo -e "randwrite result: \033[31mfailed\033[0m"; + state=1 + fi + done +} + +if [[ -z "$spdk" ]]; then + echo "error: path to spdk isn't entered" + exit 1 +fi + +start; +echo; + +run_tests; + +echo; +finish; \ No newline at end of file From a62ab2071686b4e8c476089cfd23e9635b2496c0 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 01:43:23 +0000 Subject: [PATCH 04/71] test/bdev/raid5: Add test configs and info files. --- .../raid5/tests/test0/crash_base_bdev.json | 15 +++ test/bdev/raid5/tests/test0/info.txt | 4 + test/bdev/raid5/tests/test0/randwrite.fio | 12 ++ test/bdev/raid5/tests/test0/start.json | 46 +++++++ test/bdev/raid5/tests/test0/stop.json | 27 ++++ test/bdev/raid5/tests/test0/write.fio | 12 ++ .../raid5/tests/test1/crash_base_bdev.json | 15 +++ test/bdev/raid5/tests/test1/info.txt | 4 + test/bdev/raid5/tests/test1/randwrite.fio | 12 ++ test/bdev/raid5/tests/test1/start.json | 55 ++++++++ test/bdev/raid5/tests/test1/stop.json | 33 +++++ test/bdev/raid5/tests/test1/write.fio | 12 ++ .../raid5/tests/test2/crash_base_bdev.json | 15 +++ test/bdev/raid5/tests/test2/info.txt | 4 + test/bdev/raid5/tests/test2/randwrite.fio | 12 ++ test/bdev/raid5/tests/test2/start.json | 118 ++++++++++++++++++ test/bdev/raid5/tests/test2/stop.json | 75 +++++++++++ test/bdev/raid5/tests/test2/write.fio | 12 ++ 18 files changed, 483 insertions(+) create mode 100644 test/bdev/raid5/tests/test0/crash_base_bdev.json create mode 100644 test/bdev/raid5/tests/test0/info.txt create mode 100644 test/bdev/raid5/tests/test0/randwrite.fio create mode 100644 test/bdev/raid5/tests/test0/start.json create mode 100644 test/bdev/raid5/tests/test0/stop.json create mode 100644 test/bdev/raid5/tests/test0/write.fio create mode 100644 test/bdev/raid5/tests/test1/crash_base_bdev.json create mode 100644 test/bdev/raid5/tests/test1/info.txt create mode 100644 test/bdev/raid5/tests/test1/randwrite.fio create mode 100644 test/bdev/raid5/tests/test1/start.json create mode 100644 test/bdev/raid5/tests/test1/stop.json create mode 100644 test/bdev/raid5/tests/test1/write.fio create mode 100644 test/bdev/raid5/tests/test2/crash_base_bdev.json create mode 100644 test/bdev/raid5/tests/test2/info.txt create mode 100644 test/bdev/raid5/tests/test2/randwrite.fio create mode 100644 test/bdev/raid5/tests/test2/start.json create mode 100644 test/bdev/raid5/tests/test2/stop.json create mode 100644 test/bdev/raid5/tests/test2/write.fio diff --git a/test/bdev/raid5/tests/test0/crash_base_bdev.json b/test/bdev/raid5/tests/test0/crash_base_bdev.json new file mode 100644 index 00000000000..9c23e8a2142 --- /dev/null +++ b/test/bdev/raid5/tests/test0/crash_base_bdev.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc2" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/info.txt b/test/bdev/raid5/tests/test0/info.txt new file mode 100644 index 00000000000..c458a484c32 --- /dev/null +++ b/test/bdev/raid5/tests/test0/info.txt @@ -0,0 +1,4 @@ +1: + num base bdevs: 3 + strip size: 4KB + total size: 128KB \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/randwrite.fio b/test/bdev/raid5/tests/test0/randwrite.fio new file mode 100644 index 00000000000..baba5e3ea0f --- /dev/null +++ b/test/bdev/raid5/tests/test0/randwrite.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=128K +iodepth=1 +rw=randwrite +bs=2K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/start.json b/test/bdev/raid5/tests/test0/start.json new file mode 100644 index 00000000000..792480ca6ab --- /dev/null +++ b/test/bdev/raid5/tests/test0/start.json @@ -0,0 +1,46 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc1" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc2" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 4, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/stop.json b/test/bdev/raid5/tests/test0/stop.json new file mode 100644 index 00000000000..d2b24c6d5eb --- /dev/null +++ b/test/bdev/raid5/tests/test0/stop.json @@ -0,0 +1,27 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" + }, + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/write.fio b/test/bdev/raid5/tests/test0/write.fio new file mode 100644 index 00000000000..60b4d4bfcca --- /dev/null +++ b/test/bdev/raid5/tests/test0/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=128K +iodepth=1 +rw=write +bs=8K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/crash_base_bdev.json b/test/bdev/raid5/tests/test1/crash_base_bdev.json new file mode 100644 index 00000000000..4168fa86506 --- /dev/null +++ b/test/bdev/raid5/tests/test1/crash_base_bdev.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/info.txt b/test/bdev/raid5/tests/test1/info.txt new file mode 100644 index 00000000000..1f1591340b1 --- /dev/null +++ b/test/bdev/raid5/tests/test1/info.txt @@ -0,0 +1,4 @@ +1: + num base bdevs: 4 + strip size: 8KB + total size: 192KB \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/randwrite.fio b/test/bdev/raid5/tests/test1/randwrite.fio new file mode 100644 index 00000000000..e4482325b9d --- /dev/null +++ b/test/bdev/raid5/tests/test1/randwrite.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=192K +iodepth=1 +rw=randwrite +bs=2K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/start.json b/test/bdev/raid5/tests/test1/start.json new file mode 100644 index 00000000000..c8507dae19c --- /dev/null +++ b/test/bdev/raid5/tests/test1/start.json @@ -0,0 +1,55 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc1" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc2" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc3" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 8, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2", + "Malloc3" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/stop.json b/test/bdev/raid5/tests/test1/stop.json new file mode 100644 index 00000000000..1dcc9c8cf03 --- /dev/null +++ b/test/bdev/raid5/tests/test1/stop.json @@ -0,0 +1,33 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" + }, + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc2" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc3" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/write.fio b/test/bdev/raid5/tests/test1/write.fio new file mode 100644 index 00000000000..9e792b9088c --- /dev/null +++ b/test/bdev/raid5/tests/test1/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=192K +iodepth=1 +rw=write +bs=8K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/crash_base_bdev.json b/test/bdev/raid5/tests/test2/crash_base_bdev.json new file mode 100644 index 00000000000..4168fa86506 --- /dev/null +++ b/test/bdev/raid5/tests/test2/crash_base_bdev.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/info.txt b/test/bdev/raid5/tests/test2/info.txt new file mode 100644 index 00000000000..defcbe5b4a4 --- /dev/null +++ b/test/bdev/raid5/tests/test2/info.txt @@ -0,0 +1,4 @@ +2: + num base bdevs: 11 + strip size: 16KB + total size: 1280KB \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/randwrite.fio b/test/bdev/raid5/tests/test2/randwrite.fio new file mode 100644 index 00000000000..f39211b750e --- /dev/null +++ b/test/bdev/raid5/tests/test2/randwrite.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=1M +iodepth=1 +rw=randwrite +bs=16K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/start.json b/test/bdev/raid5/tests/test2/start.json new file mode 100644 index 00000000000..5a31a37437e --- /dev/null +++ b/test/bdev/raid5/tests/test2/start.json @@ -0,0 +1,118 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc1" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc2" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc3" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc4" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc5" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc6" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc7" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc8" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc9" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc10" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 16, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2", + "Malloc3", + "Malloc4", + "Malloc5", + "Malloc6", + "Malloc7", + "Malloc8", + "Malloc9", + "Malloc10" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/stop.json b/test/bdev/raid5/tests/test2/stop.json new file mode 100644 index 00000000000..4f179f1a542 --- /dev/null +++ b/test/bdev/raid5/tests/test2/stop.json @@ -0,0 +1,75 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" + }, + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc2" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc3" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc4" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc5" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc6" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc7" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc8" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc9" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc10" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/write.fio b/test/bdev/raid5/tests/test2/write.fio new file mode 100644 index 00000000000..915837a5b5b --- /dev/null +++ b/test/bdev/raid5/tests/test2/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=md5 +size=1M +iodepth=1 +rw=write +bs=16K + +[test] +filename=/dev/ublkb1 +numjobs=1 \ No newline at end of file From 35e0baa2a630ab4d228eecc6087d98bb3de7278f Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 08:45:10 +0000 Subject: [PATCH 05/71] test/bdev/raid5: Add check for ublk target start. --- test/bdev/raid5/run_tests.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/bdev/raid5/run_tests.sh b/test/bdev/raid5/run_tests.sh index 14e7d4d6397..1fa7adf37b1 100644 --- a/test/bdev/raid5/run_tests.sh +++ b/test/bdev/raid5/run_tests.sh @@ -12,7 +12,7 @@ # exit codes: # 1 - path to spdk isn't entered -# 2 - spdk_tgt does't start +# 2 - ublk target isn't created spdk=$1 @@ -27,6 +27,11 @@ function start() { start-stop-daemon -Sbv -n spdk_tgt -x $spdk/build/bin/spdk_tgt; sleep 5; ./scripts/rpc.py ublk_create_target; + if [[ "$?" != "0" ]]; then + start-stop-daemon -Kvx $spdk/build/bin/spdk_tgt; + cd $old_dir; + exit 2; + fi cd $old_dir; } From 92974f5b32d43a6b92826ea84aa4c77049e50c23 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 08:47:59 +0000 Subject: [PATCH 06/71] test/bdev/raid5: Fix dir sample for test data. --- test/bdev/raid5/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/bdev/raid5/run_tests.sh b/test/bdev/raid5/run_tests.sh index 1fa7adf37b1..7c768976c55 100644 --- a/test/bdev/raid5/run_tests.sh +++ b/test/bdev/raid5/run_tests.sh @@ -103,7 +103,7 @@ function run_test_with_fio() { } function run_tests() { - local dir_sample=$(pwd)/tests/test; + local dir_sample=$spdk/test/bdev/raid5/tests/test; local state=0; local res; echo -e "\033[34mtest with using fio results\033[0m"; From e9e061519a532abad7b90feaaebf13cba7adae82 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 08:49:24 +0000 Subject: [PATCH 07/71] test/bdev/raid5: Add line at the end of the file. --- test/bdev/raid5/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/bdev/raid5/run_tests.sh b/test/bdev/raid5/run_tests.sh index 7c768976c55..8c6796b25fa 100644 --- a/test/bdev/raid5/run_tests.sh +++ b/test/bdev/raid5/run_tests.sh @@ -149,4 +149,4 @@ echo; run_tests; echo; -finish; \ No newline at end of file +finish; From 797f0a59096034bb96dc64bb5586b393f9c80fb5 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 08:59:38 +0000 Subject: [PATCH 08/71] test/bdev/raid5: Fix test case number. --- test/bdev/raid5/tests/test0/info.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/bdev/raid5/tests/test0/info.txt b/test/bdev/raid5/tests/test0/info.txt index c458a484c32..6b6e43b5080 100644 --- a/test/bdev/raid5/tests/test0/info.txt +++ b/test/bdev/raid5/tests/test0/info.txt @@ -1,4 +1,4 @@ -1: +0: num base bdevs: 3 strip size: 4KB total size: 128KB \ No newline at end of file From 98658dda5f12bf795513b90555cc707349e4b8db Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 09:02:51 +0000 Subject: [PATCH 09/71] test/bdev/raid5: Fix tabs num into json configs. --- .../raid5/tests/test0/crash_base_bdev.json | 28 +-- test/bdev/raid5/tests/test0/start.json | 84 +++---- test/bdev/raid5/tests/test0/stop.json | 48 ++-- .../raid5/tests/test1/crash_base_bdev.json | 28 +-- test/bdev/raid5/tests/test1/start.json | 100 ++++----- test/bdev/raid5/tests/test1/stop.json | 58 ++--- .../raid5/tests/test2/crash_base_bdev.json | 28 +-- test/bdev/raid5/tests/test2/start.json | 212 +++++++++--------- test/bdev/raid5/tests/test2/stop.json | 128 +++++------ 9 files changed, 357 insertions(+), 357 deletions(-) diff --git a/test/bdev/raid5/tests/test0/crash_base_bdev.json b/test/bdev/raid5/tests/test0/crash_base_bdev.json index 9c23e8a2142..daeda3209ea 100644 --- a/test/bdev/raid5/tests/test0/crash_base_bdev.json +++ b/test/bdev/raid5/tests/test0/crash_base_bdev.json @@ -1,15 +1,15 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Malloc2" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc2" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/start.json b/test/bdev/raid5/tests/test0/start.json index 792480ca6ab..35de9582461 100644 --- a/test/bdev/raid5/tests/test0/start.json +++ b/test/bdev/raid5/tests/test0/start.json @@ -1,46 +1,46 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc0" - }, - "method": "bdev_malloc_create" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc0" }, - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc1" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc1" }, - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc2" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc2" }, - { - "params": { - "name": "Raid5", - "raid_level": "5", - "strip_size_kb": 4, - "base_bdevs": [ - "Malloc0", - "Malloc1", - "Malloc2" - ] - }, - "method": "bdev_raid_create" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 4, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test0/stop.json b/test/bdev/raid5/tests/test0/stop.json index d2b24c6d5eb..d1a106fb0f2 100644 --- a/test/bdev/raid5/tests/test0/stop.json +++ b/test/bdev/raid5/tests/test0/stop.json @@ -1,27 +1,27 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Raid5" - }, - "method": "bdev_raid_delete" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" }, - { - "params": { - "name": "Malloc0" - }, - "method": "bdev_malloc_delete" + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" }, - { - "params": { - "name": "Malloc1" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/crash_base_bdev.json b/test/bdev/raid5/tests/test1/crash_base_bdev.json index 4168fa86506..fa55894869c 100644 --- a/test/bdev/raid5/tests/test1/crash_base_bdev.json +++ b/test/bdev/raid5/tests/test1/crash_base_bdev.json @@ -1,15 +1,15 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Malloc1" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/start.json b/test/bdev/raid5/tests/test1/start.json index c8507dae19c..31482560e0f 100644 --- a/test/bdev/raid5/tests/test1/start.json +++ b/test/bdev/raid5/tests/test1/start.json @@ -1,55 +1,55 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc0" - }, - "method": "bdev_malloc_create" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc0" }, - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc1" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc1" }, - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc2" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc2" }, - { - "params": { - "block_size": 4096, - "num_blocks": 16, - "name": "Malloc3" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "Malloc3" }, - { - "params": { - "name": "Raid5", - "raid_level": "5", - "strip_size_kb": 8, - "base_bdevs": [ - "Malloc0", - "Malloc1", - "Malloc2", - "Malloc3" - ] - }, - "method": "bdev_raid_create" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 8, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2", + "Malloc3" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test1/stop.json b/test/bdev/raid5/tests/test1/stop.json index 1dcc9c8cf03..25a715449fa 100644 --- a/test/bdev/raid5/tests/test1/stop.json +++ b/test/bdev/raid5/tests/test1/stop.json @@ -1,33 +1,33 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Raid5" - }, - "method": "bdev_raid_delete" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" }, - { - "params": { - "name": "Malloc0" - }, - "method": "bdev_malloc_delete" + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" }, - { - "params": { - "name": "Malloc2" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc2" }, - { - "params": { - "name": "Malloc3" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc3" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/crash_base_bdev.json b/test/bdev/raid5/tests/test2/crash_base_bdev.json index 4168fa86506..fa55894869c 100644 --- a/test/bdev/raid5/tests/test2/crash_base_bdev.json +++ b/test/bdev/raid5/tests/test2/crash_base_bdev.json @@ -1,15 +1,15 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Malloc1" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Malloc1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/start.json b/test/bdev/raid5/tests/test2/start.json index 5a31a37437e..b97a9be1b48 100644 --- a/test/bdev/raid5/tests/test2/start.json +++ b/test/bdev/raid5/tests/test2/start.json @@ -1,118 +1,118 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc0" - }, - "method": "bdev_malloc_create" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc0" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc1" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc1" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc2" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc2" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc3" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc3" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc4" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc4" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc5" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc5" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc6" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc6" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc7" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc7" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc8" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc8" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc9" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc9" }, - { - "params": { - "block_size": 4096, - "num_blocks": 32, - "name": "Malloc10" - }, - "method": "bdev_malloc_create" + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32, + "name": "Malloc10" }, - { - "params": { - "name": "Raid5", - "raid_level": "5", - "strip_size_kb": 16, - "base_bdevs": [ - "Malloc0", - "Malloc1", - "Malloc2", - "Malloc3", - "Malloc4", - "Malloc5", - "Malloc6", - "Malloc7", - "Malloc8", - "Malloc9", - "Malloc10" - ] - }, - "method": "bdev_raid_create" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_create" + }, + { + "params": { + "name": "Raid5", + "raid_level": "5", + "strip_size_kb": 16, + "base_bdevs": [ + "Malloc0", + "Malloc1", + "Malloc2", + "Malloc3", + "Malloc4", + "Malloc5", + "Malloc6", + "Malloc7", + "Malloc8", + "Malloc9", + "Malloc10" + ] + }, + "method": "bdev_raid_create" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/bdev/raid5/tests/test2/stop.json b/test/bdev/raid5/tests/test2/stop.json index 4f179f1a542..367926c5d43 100644 --- a/test/bdev/raid5/tests/test2/stop.json +++ b/test/bdev/raid5/tests/test2/stop.json @@ -1,75 +1,75 @@ { - "subsystems": [ - { - "subsystem": "bdev", - "config": [ - { - "params": { - "name": "Raid5" - }, - "method": "bdev_raid_delete" + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "name": "Raid5" }, - { - "params": { - "name": "Malloc0" - }, - "method": "bdev_malloc_delete" + "method": "bdev_raid_delete" + }, + { + "params": { + "name": "Malloc0" }, - { - "params": { - "name": "Malloc2" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc2" }, - { - "params": { - "name": "Malloc3" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc3" }, - { - "params": { - "name": "Malloc4" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc4" }, - { - "params": { - "name": "Malloc5" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc5" }, - { - "params": { - "name": "Malloc6" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc6" }, - { - "params": { - "name": "Malloc7" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc7" }, - { - "params": { - "name": "Malloc8" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc8" }, - { - "params": { - "name": "Malloc9" - }, - "method": "bdev_malloc_delete" + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc9" }, - { - "params": { - "name": "Malloc10" - }, - "method": "bdev_malloc_delete" - } - ] - } - ] - } \ No newline at end of file + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "Malloc10" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] +} \ No newline at end of file From 710395cba1add19ed93ae3882401ff002c517d81 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Wed, 20 Sep 2023 09:04:46 +0000 Subject: [PATCH 10/71] test/bdev/raid5: Set do_verify flags explicitly. --- test/bdev/raid5/tests/test0/randwrite.fio | 1 + test/bdev/raid5/tests/test0/write.fio | 1 + test/bdev/raid5/tests/test1/randwrite.fio | 1 + test/bdev/raid5/tests/test1/write.fio | 1 + test/bdev/raid5/tests/test2/randwrite.fio | 1 + test/bdev/raid5/tests/test2/write.fio | 1 + 6 files changed, 6 insertions(+) diff --git a/test/bdev/raid5/tests/test0/randwrite.fio b/test/bdev/raid5/tests/test0/randwrite.fio index baba5e3ea0f..6f60b30dfde 100644 --- a/test/bdev/raid5/tests/test0/randwrite.fio +++ b/test/bdev/raid5/tests/test0/randwrite.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=128K iodepth=1 diff --git a/test/bdev/raid5/tests/test0/write.fio b/test/bdev/raid5/tests/test0/write.fio index 60b4d4bfcca..9c5246cd8cc 100644 --- a/test/bdev/raid5/tests/test0/write.fio +++ b/test/bdev/raid5/tests/test0/write.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=128K iodepth=1 diff --git a/test/bdev/raid5/tests/test1/randwrite.fio b/test/bdev/raid5/tests/test1/randwrite.fio index e4482325b9d..70350791f27 100644 --- a/test/bdev/raid5/tests/test1/randwrite.fio +++ b/test/bdev/raid5/tests/test1/randwrite.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=192K iodepth=1 diff --git a/test/bdev/raid5/tests/test1/write.fio b/test/bdev/raid5/tests/test1/write.fio index 9e792b9088c..703192dba18 100644 --- a/test/bdev/raid5/tests/test1/write.fio +++ b/test/bdev/raid5/tests/test1/write.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=192K iodepth=1 diff --git a/test/bdev/raid5/tests/test2/randwrite.fio b/test/bdev/raid5/tests/test2/randwrite.fio index f39211b750e..eb8c22f44c1 100644 --- a/test/bdev/raid5/tests/test2/randwrite.fio +++ b/test/bdev/raid5/tests/test2/randwrite.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=1M iodepth=1 diff --git a/test/bdev/raid5/tests/test2/write.fio b/test/bdev/raid5/tests/test2/write.fio index 915837a5b5b..bb11a2704b6 100644 --- a/test/bdev/raid5/tests/test2/write.fio +++ b/test/bdev/raid5/tests/test2/write.fio @@ -1,6 +1,7 @@ [global] thread=1 group_reporting=1 +do_verify=1 verify=md5 size=1M iodepth=1 From 43db5d39014a98859cbc9e435e1357e2f45e4c1d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 9 Nov 2023 01:18:02 +0000 Subject: [PATCH 11/71] feat: Implement write zeroes req on start RAID5. --- module/bdev/raid/raid5.c | 180 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index a7e1017b3b8..f2385e845f1 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -13,6 +13,17 @@ #include "lib/thread/thread_internal.h" #include "spdk/log.h" +#include "spdk/likely.h" + +enum raid5_rw_type { + UNDEFINED = 0, + READ_MODIFY_WRITE = 1, + DEFAULT = 2 +}; + +struct raid5_info { + enum raid5_rw_type rw_type; +}; struct raid5_io_buffer { struct raid_bdev_io *raid_io; @@ -895,6 +906,167 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) } } +static bool +raid5_wz_req_complete_part_final(struct raid_bdev_io *raid_io, uint64_t completed, + enum spdk_bdev_io_status status) +{ + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + struct raid5_info *r0_info = raid_io->raid_bdev->module_private; + + if (raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { + r0_info->rw_type = READ_MODIFY_WRITE; + SPDK_NOTICELOG("raid5 rw_type: READ_MODIFY_WRITE\n"); + } else { + r0_info->rw_type = DEFAULT; + SPDK_NOTICELOG("raid5 rw_type: DEFAULT\n"); + } + + raid_bdev_destroy_cb(raid_io->raid_bdev, raid_io->raid_ch); + free(raid_io->raid_ch); + free(raid_io); + return true; + } else { + return false; + } +} + +static void +raid5_wz_req_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid_bdev_io *raid_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + raid5_wz_req_complete_part_final(raid_io, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + +static int +raid5_submit_write_zeroes_request(struct raid_bdev_io *raid_io); + +static void +_raid5_submit_write_zeroes_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; + + raid5_submit_write_zeroes_request(raid_io); +} + +static int +raid5_submit_write_zeroes_request(struct raid_bdev_io *raid_io) { + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t num_blocks = raid_bdev->bdev.blockcnt / (raid_bdev->num_base_bdevs - 1); + uint64_t base_bdev_io_not_submitted; + int ret = 0; + + if (raid_io->base_bdev_io_submitted == 0) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; + } + + for (uint8_t idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; ++idx) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_io->raid_ch->base_channel[idx]; + + if (base_ch == NULL) { + raid_io->base_bdev_io_submitted++; + raid5_wz_req_complete_part_final(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); + continue; + } + + ret = spdk_bdev_write_zeroes_blocks(base_info->desc, base_ch, + 0, num_blocks, + raid5_wz_req_complete_part, raid_io); + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_submit_write_zeroes_request); + return 0; + } + + base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - + raid_io->base_bdev_io_submitted; + raid5_wz_req_complete_part_final(raid_io, + base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return 0; + } + + raid_io->base_bdev_io_submitted++; + } + + if (raid_io->base_bdev_io_submitted == 0) { + ret = -ENODEV; + } + return ret; +} + +static void +raid5_set_rw_type(struct raid_bdev *raid_bdev) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *base_bdev; + struct raid_bdev_io *raid_io; + struct raid5_info *r5_info = raid_bdev->module_private; + int ret; + + r5_info->rw_type = UNDEFINED; + + for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { + desc = raid_bdev->base_bdev_info[idx].desc; + if (desc != NULL) { + base_bdev = spdk_bdev_desc_get_bdev(desc); + if (!base_bdev->fn_table->io_type_supported(base_bdev->ctxt, + SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { + r5_info->rw_type = DEFAULT; + return; + } + } + } + + raid_io = calloc(1, sizeof(struct raid_bdev_io)); + if (raid_io == NULL) { + r5_info->rw_type = DEFAULT; + return; + } + + raid_io->raid_bdev = raid_bdev; + raid_io->base_bdev_io_remaining = 0; + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; + raid_io->raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel)); + if (raid_io->raid_ch == NULL) { + free(raid_io); + r5_info->rw_type = DEFAULT; + return; + } + + ret = raid_bdev_create_cb(raid_bdev, raid_io->raid_ch); + if (ret != 0) { + free(raid_io->raid_ch); + free(raid_io); + r5_info->rw_type = DEFAULT; + return; + } + + ret = raid5_submit_write_zeroes_request(raid_io); + if (spdk_unlikely(ret != 0)) { + raid_bdev_destroy_cb(raid_bdev, raid_io->raid_ch); + free(raid_io->raid_ch); + free(raid_io); + r5_info->rw_type = DEFAULT; + return; + } +} + static uint64_t raid5_calculate_blockcnt(struct raid_bdev *raid_bdev) { @@ -919,11 +1091,19 @@ raid5_calculate_blockcnt(struct raid_bdev *raid_bdev) static int raid5_start(struct raid_bdev *raid_bdev) { + struct raid5_info *r5_info; + raid_bdev->bdev.blockcnt = raid5_calculate_blockcnt(raid_bdev); raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; raid_bdev->bdev.split_on_optimal_io_boundary = true; raid_bdev->min_base_bdevs_operational = raid_bdev->num_base_bdevs - 1; + r5_info = calloc(1, (sizeof(struct raid5_info))); + assert(r5_info != NULL); + raid_bdev->module_private = r5_info; + + raid5_set_rw_type(raid_bdev); + return 0; } From db4cf9a476935675925111686627802260560439 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 9 Nov 2023 01:19:44 +0000 Subject: [PATCH 12/71] fix: Add static to defenition raid5_queue_io_wait. --- module/bdev/raid/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index f2385e845f1..eafd46523d0 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -185,7 +185,7 @@ raid5_fill_iovs_with_zeroes(struct iovec *iovs, int iovcnt) } } -void +static void raid5_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg) { From 746b681259649557c2b076a33c49e640f963f895 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 9 Nov 2023 02:43:07 +0000 Subject: [PATCH 13/71] feat: Add request splitting on stripe. --- module/bdev/raid/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index eafd46523d0..56605320630 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1092,9 +1092,10 @@ static int raid5_start(struct raid_bdev *raid_bdev) { struct raid5_info *r5_info; + uint32_t logic_stripe_size = raid_bdev->strip_size * (raid_bdev->num_base_bdevs - 1); raid_bdev->bdev.blockcnt = raid5_calculate_blockcnt(raid_bdev); - raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; + raid_bdev->bdev.optimal_io_boundary = logic_stripe_size; raid_bdev->bdev.split_on_optimal_io_boundary = true; raid_bdev->min_base_bdevs_operational = raid_bdev->num_base_bdevs - 1; From 96ca2a268e1f1aa5d13fa3fc5ee6aeb70f140805 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 11:32:25 +0000 Subject: [PATCH 14/71] refactor: Change rw_type name to write_type. --- module/bdev/raid/raid5.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 56605320630..f99b68a1999 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -15,14 +15,14 @@ #include "spdk/log.h" #include "spdk/likely.h" -enum raid5_rw_type { +enum raid5_write_type { UNDEFINED = 0, READ_MODIFY_WRITE = 1, DEFAULT = 2 }; struct raid5_info { - enum raid5_rw_type rw_type; + enum raid5_write_type write_type; }; struct raid5_io_buffer { @@ -918,14 +918,14 @@ raid5_wz_req_complete_part_final(struct raid_bdev_io *raid_io, uint64_t complete } if (raid_io->base_bdev_io_remaining == 0) { - struct raid5_info *r0_info = raid_io->raid_bdev->module_private; + struct raid5_info *r5_info = raid_io->raid_bdev->module_private; if (raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { - r0_info->rw_type = READ_MODIFY_WRITE; - SPDK_NOTICELOG("raid5 rw_type: READ_MODIFY_WRITE\n"); + r5_info->write_type = READ_MODIFY_WRITE; + SPDK_NOTICELOG("raid5 write_type: READ_MODIFY_WRITE\n"); } else { - r0_info->rw_type = DEFAULT; - SPDK_NOTICELOG("raid5 rw_type: DEFAULT\n"); + r5_info->write_type = DEFAULT; + SPDK_NOTICELOG("raid5 write_type: DEFAULT\n"); } raid_bdev_destroy_cb(raid_io->raid_bdev, raid_io->raid_ch); @@ -1010,7 +1010,7 @@ raid5_submit_write_zeroes_request(struct raid_bdev_io *raid_io) { } static void -raid5_set_rw_type(struct raid_bdev *raid_bdev) +raid5_set_write_type(struct raid_bdev *raid_bdev) { struct spdk_bdev_desc *desc; struct spdk_bdev *base_bdev; @@ -1018,7 +1018,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) struct raid5_info *r5_info = raid_bdev->module_private; int ret; - r5_info->rw_type = UNDEFINED; + r5_info->write_type = UNDEFINED; for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { desc = raid_bdev->base_bdev_info[idx].desc; @@ -1026,7 +1026,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) base_bdev = spdk_bdev_desc_get_bdev(desc); if (!base_bdev->fn_table->io_type_supported(base_bdev->ctxt, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { - r5_info->rw_type = DEFAULT; + r5_info->write_type = DEFAULT; return; } } @@ -1034,7 +1034,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) raid_io = calloc(1, sizeof(struct raid_bdev_io)); if (raid_io == NULL) { - r5_info->rw_type = DEFAULT; + r5_info->write_type = DEFAULT; return; } @@ -1045,7 +1045,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) raid_io->raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel)); if (raid_io->raid_ch == NULL) { free(raid_io); - r5_info->rw_type = DEFAULT; + r5_info->write_type = DEFAULT; return; } @@ -1053,7 +1053,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) if (ret != 0) { free(raid_io->raid_ch); free(raid_io); - r5_info->rw_type = DEFAULT; + r5_info->write_type = DEFAULT; return; } @@ -1062,7 +1062,7 @@ raid5_set_rw_type(struct raid_bdev *raid_bdev) raid_bdev_destroy_cb(raid_bdev, raid_io->raid_ch); free(raid_io->raid_ch); free(raid_io); - r5_info->rw_type = DEFAULT; + r5_info->write_type = DEFAULT; return; } } @@ -1103,7 +1103,7 @@ raid5_start(struct raid_bdev *raid_bdev) assert(r5_info != NULL); raid_bdev->module_private = r5_info; - raid5_set_rw_type(raid_bdev); + raid5_set_write_type(raid_bdev); return 0; } From bfa35501ced5cf9c15c0c282133447c99e62ac13 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:01:22 +0000 Subject: [PATCH 15/71] feat: Add raid5 stripe request structure. --- module/bdev/raid/raid5.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index f99b68a1999..a80f33e2d65 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -25,6 +25,18 @@ struct raid5_info { enum raid5_write_type write_type; }; +struct raid5_stripe_request { + struct raid_bdev_io *raid_io; + + struct iovec **strip_buffs; + + int* strip_buffs_cnts; + + int strip_buffs_cnt; + + int broken_strip_idx; +}; + struct raid5_io_buffer { struct raid_bdev_io *raid_io; From 6db2b6a3d5d85ca5ee6d3346b50b2a5fc8b3ce29 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:10:47 +0000 Subject: [PATCH 16/71] feat: Add function-helpers for raid5. --- module/bdev/raid/raid5.c | 105 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index a80f33e2d65..072eabb98dd 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -336,6 +336,111 @@ raid5_write_request_writing_complete_part(struct spdk_bdev_io *bdev_io, bool suc } } +static bool +raid5_check_io_boundaries(struct raid_bdev_io *raid_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + uint64_t end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + + return (start_strip_idx <= end_strip_idx) && + (start_strip_idx / (raid_bdev->num_base_bdevs - 1) == + end_strip_idx / (raid_bdev->num_base_bdevs - 1)); +} + +static inline void +raid5_check_raid_ch(struct raid_bdev_io_channel *raid_ch) +{ + assert(raid_ch != NULL); + assert(raid_ch->base_channel != NULL); +} + +static uint64_t +raid5_start_strip_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev) +{ + uint64_t start_strip_idx; + uint64_t parity_strip_idx; + + start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + parity_strip_idx = raid5_parity_strip_index(raid_bdev, + start_strip_idx / (raid_bdev->num_base_bdevs - 1)); + start_strip_idx %= (raid_bdev->num_base_bdevs - 1); + start_strip_idx += 1 + parity_strip_idx; + return start_strip_idx >= raid_bdev->num_base_bdevs ? + start_strip_idx - raid_bdev->num_base_bdevs : + start_strip_idx; +} + +static uint64_t +raid5_end_strip_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev) +{ + uint64_t end_strip_idx; + uint64_t parity_strip_idx; + + end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + parity_strip_idx = raid5_parity_strip_index(raid_bdev, + end_strip_idx / (raid_bdev->num_base_bdevs - 1)); + end_strip_idx %= (raid_bdev->num_base_bdevs - 1); + end_strip_idx += 1 + parity_strip_idx; + return end_strip_idx >= raid_bdev->num_base_bdevs ? + end_strip_idx - raid_bdev->num_base_bdevs : + end_strip_idx; +} + +static uint64_t +raid5_ofs_blcks(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev, uint64_t idx) +{ + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + + if (idx == ststrip_idx) { + return (((bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift) / + (raid_bdev->num_base_bdevs - 1)) << raid_bdev->strip_size_shift) + + (bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1)); + } else { + return ((bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift) / + (raid_bdev->num_base_bdevs - 1)) << raid_bdev->strip_size_shift; + } +} + +static uint64_t +raid5_num_blcks(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev, uint64_t idx) +{ + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t st_ofs = (bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1)); + + if (idx == ststrip_idx) { + if (bdev_io->u.bdev.num_blocks + st_ofs <= raid_bdev->strip_size) { + return bdev_io->u.bdev.num_blocks; + } else { + return raid_bdev->strip_size - st_ofs; + } + } else if (idx == estrip_idx) { + return ((bdev_io->u.bdev.num_blocks + st_ofs - 1) & + (raid_bdev->strip_size - 1)) + 1; + } else { + return raid_bdev->strip_size; + } +} + +static inline bool +raid5_is_req_strip(uint64_t ststrip_idx, uint64_t estrip_idx, uint64_t idx) { + return (ststrip_idx <= estrip_idx) ? + (ststrip_idx <= idx) && (idx <= estrip_idx) : + (ststrip_idx <= idx) || (idx <= estrip_idx); +} + +static inline uint64_t +raid5_next_idx(uint64_t curr, struct raid_bdev *raid_bdev) +{ + return (curr + 1) >= raid_bdev->num_base_bdevs ? + curr + 1 - raid_bdev->num_base_bdevs : + curr + 1; +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 52a1ad6dcb09cce3624b232a138ee4101e53cf8a Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:13:05 +0000 Subject: [PATCH 17/71] feat: Add xor iovs function for RAID5. --- module/bdev/raid/raid5.c | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 072eabb98dd..4227aba96c7 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -441,6 +441,64 @@ raid5_next_idx(uint64_t curr, struct raid_bdev *raid_bdev) curr + 1; } +static void +raid5_xor_iovs_with_iovs(struct iovec *xor_iovs, int xor_iovcnt, uint64_t xor_ofs_b8, + struct iovec *iovs, int iovcnt, uint64_t ofs_b8, + uint64_t num_b8) +{ + uint64_t *xb8; + uint64_t *b8; + uint64_t xofs8 = xor_ofs_b8; + uint64_t ofs8 = ofs_b8; + uint64_t xor_idx = 0; + uint64_t idx = 0; + + SPDK_ERRLOG("raid5_xor_iovs_with_iovs\n"); + + while (xofs8 >= xor_iovs[xor_idx].iov_len / 8) { + xofs8 -= xor_iovs[xor_idx].iov_len / 8; + ++xor_idx; + } + + while (ofs8 >= iovs[idx].iov_len / 8) { + ofs8 -= iovs[idx].iov_len / 8; + ++idx; + } + + while (num_b8 > 0) { + xb8 = xor_iovs[xor_idx].iov_base; + xb8 = &xb8[xofs8]; + b8 = iovs[idx].iov_base; + b8 = &b8[ofs8]; + if (xor_iovs[xor_idx].iov_len / 8 - xofs8 > + iovs[idx].iov_len / 8 - ofs8) { + for (uint64_t i = ofs8; i < (iovs[idx].iov_len / 8); ++i) { + xb8[i - ofs8 + xofs8] ^= b8[i]; + } + num_b8 -= iovs[idx].iov_len / 8 - ofs8; + ++idx; + ofs8 = 0; + } else if (xor_iovs[xor_idx].iov_len / 8 - xofs8 < + iovs[idx].iov_len / 8 - ofs8) { + for (uint64_t i = xofs8; i < (xor_iovs[xor_idx].iov_len / 8); ++i) { + xb8[i] ^= b8[i - xofs8 + ofs8]; + } + num_b8 -= xor_iovs[xor_idx].iov_len / 8 - xofs8; + ++xor_idx; + xofs8 = 0; + } else { + for (uint64_t i = ofs8; i < (iovs[idx].iov_len / 8); ++i) { + xb8[i - ofs8 + xofs8] ^= b8[i]; + } + num_b8 -= iovs[idx].iov_len / 8 - ofs8; + ++idx; + ofs8 = 0; + ++xor_idx; + xofs8 = 0; + } + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 8dd25a0c9a737ae6f86d76e02f3dcd0137b5d761 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:20:37 +0000 Subject: [PATCH 18/71] feat: Add funs to allocate&free raid5 stripe req. --- module/bdev/raid/raid5.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 4227aba96c7..cdccbf09843 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -499,6 +499,42 @@ raid5_xor_iovs_with_iovs(struct iovec *xor_iovs, int xor_iovcnt, uint64_t xor_of } } +static struct raid5_stripe_request * +raid5_get_stripe_request(struct raid_bdev_io *raid_io) +{ + struct raid5_stripe_request *request; + + request = calloc(1, sizeof(struct raid5_stripe_request)); + if (request == NULL) { + return NULL; + } + + request->raid_io = raid_io; + request->strip_buffs_cnt = raid_io->raid_bdev->num_base_bdevs; + request->broken_strip_idx = raid_io->raid_bdev->num_base_bdevs; + request->strip_buffs = calloc(request->strip_buffs_cnt, sizeof(struct iovec *)); + if (request->strip_buffs == NULL) { + free(request); + return NULL; + } + + request->strip_buffs_cnts = calloc(request->strip_buffs_cnt, sizeof(int)); + if (request->strip_buffs_cnts == NULL) { + free(request->strip_buffs); + free(request); + return NULL; + } + + return request; +} + +static void +raid5_free_stripe_request(struct raid5_stripe_request *request) { + free(request->strip_buffs_cnts); + free(request->strip_buffs); + free(request); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 9cd18c307cdb3efc68b42152781f664f9f5bbbe7 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:27:34 +0000 Subject: [PATCH 19/71] feat: Add funs to allocate&free raid5 strip buffs. --- module/bdev/raid/raid5.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index cdccbf09843..bd0ec771fe2 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -535,6 +535,44 @@ raid5_free_stripe_request(struct raid5_stripe_request *request) { free(request); } +static int +raid5_get_strips_buffs_until(struct raid5_stripe_request *request, + uint8_t start_idx, uint8_t until_idx, uint64_t num_blcks) +{ + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; + + SPDK_ERRLOG("raid5_get_strips_buffs_until\n"); + + for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { + request->strip_buffs_cnts[idx] = 1; + request->strip_buffs[idx] = raid5_get_buffer(num_blcks * block_size_b); + if (request->strip_buffs[idx] == NULL) { + for (uint8_t i = start_idx; i != idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_free_buffer(request->strip_buffs[i]); + request->strip_buffs_cnts[i] = 0; + } + request->strip_buffs_cnts[idx] = 0; + return -ENOMEM; + } + } + return 0; +} + +static void +raid5_free_strips_buffs_until(struct raid5_stripe_request *request, + uint8_t start_idx, uint8_t until_idx) +{ + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + + SPDK_ERRLOG("raid5_free_strips_buffs_until\n"); + + for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { + raid5_free_buffer(request->strip_buffs[idx]); + request->strip_buffs_cnts[idx] = 0; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 90b042412b434314705538c1b9b6427d47285a17 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:30:23 +0000 Subject: [PATCH 20/71] feat: Add funs to set&free req strip iovs. --- module/bdev/raid/raid5.c | 86 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index bd0ec771fe2..1cd169e9462 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -573,6 +573,92 @@ raid5_free_strips_buffs_until(struct raid5_stripe_request *request, } } +static int +raid5_set_req_strips_iovs_until(struct raid5_stripe_request *request, + uint8_t start_idx, uint8_t until_idx, + int *iov_idx, uint64_t *remaining_len) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint64_t num_blcks; + uint64_t len; + uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; + uint64_t *iov_base_b8; + int end_iov_idx; + + SPDK_ERRLOG("raid5_set_req_strips_iovs_until\n"); + + for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); + end_iov_idx = *iov_idx; + len = *remaining_len; + + while ((len / block_size_b) < num_blcks) { + ++end_iov_idx; + len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; + } + + len = num_blcks * block_size_b; + + request->strip_buffs_cnts[idx] = end_iov_idx - *iov_idx + 1; + request->strip_buffs[idx] = calloc(request->strip_buffs_cnts[idx], sizeof(struct iovec)); + if (request->strip_buffs[idx] == NULL) { + for (uint8_t i = start_idx; i != idx; i = raid5_next_idx(i, raid_bdev)) { + free(request->strip_buffs[i]); + request->strip_buffs_cnts[i] = 0; + } + request->strip_buffs_cnts[idx] = 0; + return -ENOMEM; + } + + iov_base_b8 = bdev_io->u.bdev.iovs[*iov_idx].iov_base; + request->strip_buffs[idx][0].iov_base = + &iov_base_b8[(bdev_io->u.bdev.iovs[*iov_idx].iov_len - *remaining_len) / 8]; + if (*remaining_len >= num_blcks * block_size_b) { + request->strip_buffs[idx][0].iov_len = num_blcks * block_size_b; + len -= num_blcks * block_size_b; + *remaining_len -= num_blcks * block_size_b; + } else { + request->strip_buffs[idx][0].iov_len = *remaining_len; + len -= *remaining_len; + for (uint8_t i = *iov_idx + 1; i < end_iov_idx; ++i) { + request->strip_buffs[idx][i - *iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; + request->strip_buffs[idx][i - *iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; + len -= request->strip_buffs[idx][i - *iov_idx].iov_len; + } + request->strip_buffs[idx][request->strip_buffs_cnts[idx] - 1].iov_base = + bdev_io->u.bdev.iovs[end_iov_idx].iov_base; + request->strip_buffs[idx][request->strip_buffs_cnts[idx] - 1].iov_len = len; + *remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; + *iov_idx = end_iov_idx; + } + + if (*remaining_len == 0) { + ++(*iov_idx); + if (*iov_idx < bdev_io->u.bdev.iovcnt) { + *remaining_len = bdev_io->u.bdev.iovs[*iov_idx].iov_len; + } + } + } + return 0; +} + +static void +raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, + uint8_t start_idx, uint8_t until_idx) +{ + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + + SPDK_ERRLOG("raid5_free_req_strips_iovs_until\n"); + + for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { + free(request->strip_buffs[idx]); + request->strip_buffs[idx] = NULL; + request->strip_buffs_cnts[idx] = 0; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From ed41e9b4ec04a9825b7d6fed80e1fa9e2fad97d5 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:50:46 +0000 Subject: [PATCH 21/71] feat: Add set&free strip buffs funs for read req. Add set&free strip buffs funs for read required strips case. --- module/bdev/raid/raid5.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 1cd169e9462..cb3323d45d9 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -659,6 +659,35 @@ raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, } } +static int +raid5_read_req_strips_set_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; + int iov_idx = 0; + + SPDK_ERRLOG("raid5_read_req_strips_set_strip_buffs\n"); + + return raid5_set_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx, &iov_idx, &remaining_len); +} + +static void +raid5_read_req_strips_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint8_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + + SPDK_ERRLOG("raid5_read_req_strips_free_strip_buffs\n"); + + raid5_free_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 4533bc7893455b50951bc97f1e21eb8f9600faac Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:55:12 +0000 Subject: [PATCH 22/71] feat: Add set&free strip buffs funs for read req. Add set&free strip buffs funs for read required strips except one case. --- module/bdev/raid/raid5.c | 250 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index cb3323d45d9..27647600a5a 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -688,6 +688,256 @@ raid5_read_req_strips_free_strip_buffs(struct raid5_stripe_request *request) raid5_free_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx); } +static int +raid5_read_exc_req_strip_set_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; + uint64_t len; + uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; + uint64_t *iov_base_b8; + uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + int end_iov_idx; + int iov_idx = 0; + int ret = 0; + int sts_idx_ofs = 0; + int es_idx_extra = 0; + + SPDK_ERRLOG("raid5_read_exc_req_strip_set_strip_buffs\n"); + + // not req strip + ret = raid5_get_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); + if (ret != 0) { + return ret; + } + + // start req strip + sts_idx_ofs = ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) ? + 1 : 0; + + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + end_iov_idx = iov_idx; + len = remaining_len; + + while ((len / block_size_b) < num_blcks) { + ++end_iov_idx; + len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; + } + + request->strip_buffs_cnts[sts_idx] = end_iov_idx - iov_idx + 1 + sts_idx_ofs; + request->strip_buffs[sts_idx] = calloc(request->strip_buffs_cnts[sts_idx], sizeof(struct iovec)); + if (request->strip_buffs[sts_idx] == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + request->strip_buffs_cnts[sts_idx] = 0; + return -ENOMEM; + } + + len = num_blcks * block_size_b; + + iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; + request->strip_buffs[sts_idx][sts_idx_ofs].iov_base = + &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; + + SPDK_ERRLOG("iov_base_b8: %llu\n", iov_base_b8); + SPDK_ERRLOG("idx: %lu\n", (bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8); + SPDK_ERRLOG("iov_base: %llu\n", request->strip_buffs[sts_idx][sts_idx_ofs].iov_base); + SPDK_ERRLOG("remaining len: %lu\n", remaining_len); + SPDK_ERRLOG("iov_idx: %d", iov_idx); + + if (remaining_len >= num_blcks * block_size_b) { + request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = num_blcks * block_size_b; + len -= num_blcks * block_size_b; + remaining_len -= num_blcks * block_size_b; + } else { + request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = remaining_len; + len -= remaining_len; + for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { + request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; + request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; + len -= request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len; + } + request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_base = + bdev_io->u.bdev.iovs[end_iov_idx].iov_base; + request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_len = len; + remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; + iov_idx = end_iov_idx; + } + + if (remaining_len == 0) { + ++iov_idx; + if (iov_idx < bdev_io->u.bdev.iovcnt) { + remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; + } + } + + if (sts_idx_ofs == 1) { + request->strip_buffs[sts_idx][0].iov_len = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) + - ofs_blcks) * block_size_b; + request->strip_buffs[sts_idx][0].iov_base = calloc(request->strip_buffs[sts_idx][0].iov_len, + sizeof(char)); + if (request->strip_buffs[sts_idx][0].iov_base == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + return -ENOMEM; + } + } + + if (sts_idx == es_idx) { + return 0; + } + + // middle req strip + ret = raid5_set_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx, + &iov_idx, &remaining_len); + if (ret != 0) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + + return ret; + } + + // end req strip + es_idx_extra = ofs_blcks + num_blcks > + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx) ? + 1 : 0; + + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + end_iov_idx = iov_idx; + len = remaining_len; + + while ((len / block_size_b) < num_blcks) { + ++end_iov_idx; + len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; + } + + request->strip_buffs_cnts[es_idx] = end_iov_idx - iov_idx + 1 + es_idx_extra; + request->strip_buffs[es_idx] = calloc(request->strip_buffs_cnts[es_idx], sizeof(struct iovec)); + if (request->strip_buffs[es_idx] == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + request->strip_buffs_cnts[es_idx] = 0; + return -ENOMEM; + } + + len = num_blcks * block_size_b; + + iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; + request->strip_buffs[es_idx][0].iov_base = + &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; + if (remaining_len >= num_blcks * block_size_b) { + request->strip_buffs[es_idx][0].iov_len = num_blcks * block_size_b; + len -= num_blcks * block_size_b; + remaining_len -= num_blcks * block_size_b; + } else { + request->strip_buffs[es_idx][0].iov_len = remaining_len; + len -= remaining_len; + for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { + request->strip_buffs[es_idx][i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; + request->strip_buffs[es_idx][i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; + len -= request->strip_buffs[es_idx][i - iov_idx].iov_len; + } + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = + bdev_io->u.bdev.iovs[end_iov_idx].iov_base; + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = len; + remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; + iov_idx = end_iov_idx; + } + + if (remaining_len == 0) { + ++iov_idx; + if (iov_idx < bdev_io->u.bdev.iovcnt) { + remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; + } + } + + if (es_idx_extra == 1) { + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = + ofs_blcks + num_blcks - + (raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx)); + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = + calloc(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + - 1].iov_len, + sizeof(char)); + if (request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base + == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + free(request->strip_buffs[es_idx]); + request->strip_buffs[es_idx] = NULL; + request->strip_buffs_cnts[es_idx] = 0; + return -ENOMEM; + } + } + return 0; +} + +static void +raid5_read_exc_req_strip_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + + SPDK_ERRLOG("raid5_read_exc_req_strip_free_strip_buffs\n"); + + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + + if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx)) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + if (sts_idx == es_idx) { + return; + } + + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + + if (ofs_blcks + num_blcks > raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx)) { + free(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + - 1].iov_base); + } + free(request->strip_buffs[es_idx]); + request->strip_buffs[es_idx] = NULL; + request->strip_buffs_cnts[es_idx] = 0; +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From d94fdd545108acc72701b7d77d06d3d2cc05070d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 12:57:14 +0000 Subject: [PATCH 23/71] feat: Add raid5 stripe req completion. --- module/bdev/raid/raid5.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 27647600a5a..f32affb3b98 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -938,6 +938,16 @@ raid5_read_exc_req_strip_free_strip_buffs(struct raid5_stripe_request *request) request->strip_buffs_cnts[es_idx] = 0; } +static void +raid5_stripe_req_complete(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + raid5_free_stripe_request(request); + + raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 7f4735c617fab20bf5f9e45589d43a177d8b41ee Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 13:02:38 +0000 Subject: [PATCH 24/71] feat: Add raid5 read complete part final callback. --- module/bdev/raid/raid5.c | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index f32affb3b98..b6024b97043 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -948,6 +948,83 @@ raid5_stripe_req_complete(struct raid5_stripe_request *request) raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); } +static bool +raid5_read_complete_part_final(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_read_complete_part_final\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + if (request->broken_strip_idx != raid_io->raid_bdev->num_base_bdevs) { + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + uint64_t block_size_b8 = ((uint64_t)128 * raid_bdev->strip_size_kb) + / raid_bdev->strip_size; + uint64_t br_ofs_b8 = block_size_b8 * raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + uint64_t br_num_b8 = block_size_b8 * raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + uint64_t ofs_b8; + uint8_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint8_t after_broken = raid5_next_idx(request->broken_strip_idx, raid_bdev); + raid5_fill_iovs_with_zeroes(request->strip_buffs[request->broken_strip_idx], + request->strip_buffs_cnts[request->broken_strip_idx]); + + for (uint8_t i = after_es_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[request->broken_strip_idx], + request->strip_buffs_cnts[request->broken_strip_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + br_num_b8); + } + + for (uint8_t i = sts_idx; i != request->broken_strip_idx; i = raid5_next_idx(i, raid_bdev)) { + ofs_b8 = block_size_b8 * raid5_ofs_blcks(bdev_io, raid_bdev, i); + if (br_ofs_b8 >= ofs_b8) { + ofs_b8 = br_ofs_b8 - ofs_b8; + } else { + ofs_b8 = 0; + } + raid5_xor_iovs_with_iovs(request->strip_buffs[request->broken_strip_idx], + request->strip_buffs_cnts[request->broken_strip_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + br_num_b8); + } + + for (uint8_t i = after_broken; i != after_es_idx; i = raid5_next_idx(i, raid_bdev)) { + ofs_b8 = block_size_b8 * raid5_ofs_blcks(bdev_io, raid_bdev, i); + if (br_ofs_b8 >= ofs_b8) { + ofs_b8 = br_ofs_b8 - ofs_b8; + } else { + ofs_b8 = 0; + } + raid5_xor_iovs_with_iovs(request->strip_buffs[request->broken_strip_idx], + request->strip_buffs_cnts[request->broken_strip_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + br_num_b8); + } + + raid5_read_exc_req_strip_free_strip_buffs(request); + assert(0); + } else { + raid5_read_req_strips_free_strip_buffs(request); + } + + raid5_stripe_req_complete(request); + return true; + } else { + return false; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 47059783c6aba1d8c5dc19f4a6432263c0ada097 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 13:03:41 +0000 Subject: [PATCH 25/71] feat: Add raid5 read complete part callback. --- module/bdev/raid/raid5.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index b6024b97043..c8998efe137 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1025,6 +1025,18 @@ raid5_read_complete_part_final(struct raid5_stripe_request *request, uint64_t co } } +static void raid5_read_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_read_complete_part\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_read_complete_part_final(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 76492b8babd72779f6766d5a6b6997848089bc7e Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 14:34:14 +0000 Subject: [PATCH 26/71] feat: Add reading required strips for RAID5. --- module/bdev/raid/raid5.c | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index c8998efe137..b3584841c9a 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1037,6 +1037,67 @@ static void raid5_read_complete_part(struct spdk_bdev_io *bdev_io, bool success, SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_read_req_strips(struct raid5_stripe_request *request); + +static void +_raid5_read_req_strips(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_read_req_strips(request); +} + +static void +raid5_read_req_strips(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t base_bdev_io_not_submitted; + uint8_t after_estrip_idx = raid5_next_idx(estrip_idx, raid_bdev); + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_read_req_strips\n"); + + start_idx = (ststrip_idx + raid_io->base_bdev_io_submitted) > raid_bdev->num_base_bdevs ? + ststrip_idx + raid_io->base_bdev_io_submitted - raid_bdev->num_base_bdevs : + ststrip_idx + raid_io->base_bdev_io_submitted; + + for (uint8_t idx = start_idx; idx != after_estrip_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + raid5_ofs_blcks(bdev_io, raid_bdev, idx), + raid5_num_blcks(bdev_io, raid_bdev, idx), + raid5_read_complete_part, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_read_req_strips, request); + return; + } + + base_bdev_io_not_submitted = ((estrip_idx + raid_bdev->num_base_bdevs) - + ststrip_idx) % raid_bdev->num_base_bdevs + 1 - + raid_io->base_bdev_io_submitted; + raid5_read_complete_part_final(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 8c98007929599300e5018ab3bebe2f5d73cd0af2 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 14:37:23 +0000 Subject: [PATCH 27/71] feat: Add reading with broken required strip case. --- module/bdev/raid/raid5.c | 72 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index b3584841c9a..260a912f024 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1098,6 +1098,78 @@ raid5_read_req_strips(struct raid5_stripe_request *request) } } +static void raid5_read_except_one_req_strip(struct raid5_stripe_request *request); + +static void +_raid5_read_except_one_req_strip(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_read_except_one_req_strip(request); +} + +static void +raid5_read_except_one_req_strip(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t after_brstrip_idx = raid5_next_idx(request->broken_strip_idx, raid_bdev); + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_read_except_one_req_strip\n"); + + start_idx = (after_brstrip_idx + raid_io->base_bdev_io_submitted) > raid_bdev->num_base_bdevs ? + after_brstrip_idx + raid_io->base_bdev_io_submitted - raid_bdev->num_base_bdevs : + after_brstrip_idx + raid_io->base_bdev_io_submitted; + + for (uint8_t idx = start_idx; idx != request->broken_strip_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); + + if (raid5_is_req_strip(ststrip_idx, estrip_idx, idx)) { + if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, idx)) { + num_blcks = raid_bdev->strip_size; + ofs_blcks = spdk_min(ofs_blcks, raid5_ofs_blcks(bdev_io, raid_bdev, idx)); + } else { + num_blcks = spdk_max(num_blcks, raid5_num_blcks(bdev_io, raid_bdev, idx)); + } + } + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_read_complete_part, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_read_except_one_req_strip, request); + return; + } + + base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 1 - + raid_io->base_bdev_io_submitted; + raid5_read_complete_part_final(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 725f9bdf485cdfdbfe072bcc01c7aeba2b374e7e Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 14:49:03 +0000 Subject: [PATCH 28/71] feat: Change read request logic. --- module/bdev/raid/raid5.c | 248 +++++++++++++++++++-------------------- 1 file changed, 118 insertions(+), 130 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 260a912f024..618840513f1 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1170,153 +1170,85 @@ raid5_read_except_one_req_strip(struct raid5_stripe_request *request) } } -static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); - static void -_raid5_submit_rw_request(void *_raid_io) +raid5_submit_read_request(struct raid5_stripe_request *request) { - struct raid_bdev_io *raid_io = _raid_io; - - raid5_submit_rw_request(raid_io); -} - -static void -raid5_submit_read_request(struct raid_bdev_io *raid_io) -{ - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - struct spdk_bdev_ext_io_opts io_opts = {}; + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; - struct raid_bdev *raid_bdev = raid_io->raid_bdev; - uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; - uint64_t stripe_index; - uint64_t parity_strip_idx; - uint64_t req_bdev_idx; - uint32_t offset_in_strip; - uint64_t offset_blocks; - uint64_t num_blocks; - int ret = 0; - uint64_t start_strip_idx; - uint64_t end_strip_idx; - struct raid_base_bdev_info *base_info; struct spdk_io_channel *base_ch; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_estrip_idx = raid5_next_idx(estrip_idx, raid_bdev); + int ret = 0; - start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> - raid_bdev->strip_size_shift; - if (start_strip_idx != end_strip_idx) { - SPDK_ERRLOG("I/O spans strip boundary!\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - return; - } - - assert(raid_ch != NULL); - assert(raid_ch->base_channel); - - io_opts.size = sizeof(io_opts); - io_opts.memory_domain = bdev_io->u.bdev.memory_domain; - io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; - io_opts.metadata = bdev_io->u.bdev.md_buf; + SPDK_ERRLOG("raid5_submit_read_request"); - stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); - parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); - offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); + for (uint8_t idx = ststrip_idx; idx != after_estrip_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_ch = raid_ch->base_channel[idx]; - req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); - if (req_bdev_idx >= parity_strip_idx) { - ++req_bdev_idx; + if (base_ch == NULL) { + if (request->broken_strip_idx == raid_bdev->num_base_bdevs) { + request->broken_strip_idx = idx; + } else { + SPDK_ERRLOG("RAID5 read request: 2 broken strips\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + assert(false); + return; + } + } } - offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; - num_blocks = bdev_io->u.bdev.num_blocks; - - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - - if (base_ch != NULL) { - // case: reading only one strip - - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - if (ret == -ENOMEM) { - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } else if (ret != 0) { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + if (request->broken_strip_idx == raid_bdev->num_base_bdevs) { + ret = raid5_read_req_strips_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 read request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; } - } else { - // case: broken request strip - uint8_t start_idx; + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = ((estrip_idx + raid_bdev->num_base_bdevs) - + ststrip_idx) % raid_bdev->num_base_bdevs + 1; + raid5_read_req_strips(request); + } else { + for (uint8_t idx = after_estrip_idx; idx != ststrip_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_ch = raid_ch->base_channel[idx]; - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; - raid5_fill_iovs_with_zeroes(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt); + if (base_ch == NULL) { + SPDK_ERRLOG("RAID5 read request: 2 broken strips\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + assert(false); + return; + } } - start_idx = raid_io->base_bdev_io_submitted; - if (req_bdev_idx <= start_idx) { - start_idx++; + ret = raid5_read_exc_req_strip_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 read request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; } - for (uint8_t idx = start_idx; idx < raid_bdev->num_base_bdevs; ++idx) { - struct raid5_io_buffer *io_buffer; - - base_info = &raid_bdev->base_bdev_info[idx]; - base_ch = raid_ch->base_channel[idx]; - if (base_ch == NULL) { - if (idx == req_bdev_idx) { - continue; - } else { - SPDK_ERRLOG("2 broken strips\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - - (raid_bdev->num_base_bdevs - 1); - if (raid_io->base_bdev_io_remaining == 0) { - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - return; - } - } + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; + raid5_read_except_one_req_strip(request); + } +} - io_buffer = raid5_get_io_buffer(raid_io, num_blocks * block_size_b); - if (io_buffer == NULL) { - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - return; - } - - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - io_buffer->buffer, 1, - offset_blocks, num_blocks, raid5_read_request_complete_part, - io_buffer, &io_opts); +static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); - if (ret != 0) { - raid5_free_io_buffer(io_buffer); - if (ret == -ENOMEM) { - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - - (raid_bdev->num_base_bdevs - 1); - if (raid_io->base_bdev_io_remaining == 0) { - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - } - return; - } +static void +_raid5_submit_rw_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; - raid_io->base_bdev_io_submitted++; - } - } + raid5_submit_rw_request(raid_io); } static void raid5_submit_write_request_reading(struct raid5_io_buffer *wr_xor_buff); @@ -1736,11 +1668,67 @@ raid5_submit_write_request(struct raid_bdev_io *raid_io) static void raid5_submit_rw_request(struct raid_bdev_io *raid_io) { - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid5_info *r5_info = raid_bdev->module_private; + struct raid5_stripe_request *request; + + // debug logs + + SPDK_ERRLOG("iovcnt: %d\n", bdev_io->u.bdev.iovcnt); + + for (int i = 0; i < bdev_io->u.bdev.iovcnt; ++i) { + SPDK_ERRLOG("iovec #%d, addr:%llu, len: %lu\n", + i, + bdev_io->u.bdev.iovs[i].iov_base, + bdev_io->u.bdev.iovs[i].iov_len); + } + + uint64_t start_strip; + uint64_t end_strip; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_estrip_idx = raid5_next_idx(estrip_idx, raid_bdev); + + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + + SPDK_ERRLOG("start_strip: %lu\nend_strip: %lu\nststrip_idx: %lu\nestrip_idx: %lu\nafter_estrip_idx: %lu\n", + start_strip, + end_strip, + ststrip_idx, + estrip_idx, + after_estrip_idx); + + // end debug logs + + if (!raid5_check_io_boundaries(raid_io)) { + SPDK_ERRLOG("RAID5: I/O spans stripe boundaries!\n"); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + assert(false); + return; + } + + raid5_check_raid_ch(raid_ch); + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && r5_info->write_type == UNDEFINED) { + SPDK_ERRLOG("RAID5: write type is undefinied\n"); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + request = raid5_get_stripe_request(raid_io); + if (request == NULL) { + SPDK_ERRLOG("RAID5: allocation of stripe request is failed\n"); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } switch (bdev_io->type) { case SPDK_BDEV_IO_TYPE_READ: - raid5_submit_read_request(raid_io); + raid5_submit_read_request(request); break; case SPDK_BDEV_IO_TYPE_WRITE: raid5_submit_write_request(raid_io); From 11d67e33d407d1bd5a5bdfcca47086d7ed221e7c Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 14:55:22 +0000 Subject: [PATCH 29/71] fix: Change memory_domains_supported to false. Current implementation RAID5 doesn't support memory domain. --- module/bdev/raid/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 618840513f1..fc108f05d6a 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1968,7 +1968,7 @@ raid5_resize(struct raid_bdev *raid_bdev) static struct raid_bdev_module g_raid5_module = { .level = RAID5, .base_bdevs_min = 3, - .memory_domains_supported = true, + .memory_domains_supported = false, .start = raid5_start, .submit_rw_request = raid5_submit_rw_request, .resize = raid5_resize From 7bb449bbf3751cd628dd33573b1753925095d0a3 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 14:58:53 +0000 Subject: [PATCH 30/71] fix: Add stripe request completion. Add stripe request completion for incorrect I/O type. --- module/bdev/raid/raid5.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index fc108f05d6a..226d883d5f2 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1734,8 +1734,9 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) raid5_submit_write_request(raid_io); break; default: - SPDK_ERRLOG("Invalid request type"); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + SPDK_ERRLOG("RAID5: Invalid request type"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); assert(false); } } From 17890bfbd744ef6fc4b0d797ea0917c7523d3e4a Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Thu, 23 Nov 2023 15:02:42 +0000 Subject: [PATCH 31/71] fix: Add failed completion on submit write request --- module/bdev/raid/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 226d883d5f2..281d99ba1e1 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1731,7 +1731,8 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) raid5_submit_read_request(request); break; case SPDK_BDEV_IO_TYPE_WRITE: - raid5_submit_write_request(raid_io); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; + raid5_stripe_req_complete(request); break; default: SPDK_ERRLOG("RAID5: Invalid request type"); From 400e6fcb6256f0fa4cc36fa6899c44d4a181a80e Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 1 Dec 2023 01:27:28 +0000 Subject: [PATCH 32/71] feat: Add fns of setting&freeing all strip buffs. Add functions of setting&freeing all strip buffs for stripe request. --- module/bdev/raid/raid5.c | 247 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 281d99ba1e1..966ea0292d5 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -659,6 +659,253 @@ raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, } } +static int +raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blcks, uint64_t num_blcks) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; + uint64_t len; + uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; + uint64_t *iov_base_b8; + uint64_t blocks; + int end_iov_idx; + int iov_idx = 0; + int ret = 0; + int sts_idx_ofs = 0; + int es_idx_extra = 0; + + SPDK_ERRLOG("raid5_set_all_strip_buffs\n"); + + // not req strip + ret = raid5_get_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); + if (ret != 0) { + return ret; + } + + // start req strip + sts_idx_ofs = ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) ? + 1 : 0; + + blocks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + end_iov_idx = iov_idx; + len = remaining_len; + + while ((len / block_size_b) < blocks) { + ++end_iov_idx; + len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; + } + + request->strip_buffs_cnts[sts_idx] = end_iov_idx - iov_idx + 1 + sts_idx_ofs; + request->strip_buffs[sts_idx] = calloc(request->strip_buffs_cnts[sts_idx], sizeof(struct iovec)); + if (request->strip_buffs[sts_idx] == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + request->strip_buffs_cnts[sts_idx] = 0; + return -ENOMEM; + } + + len = blocks * block_size_b; + + iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; + request->strip_buffs[sts_idx][sts_idx_ofs].iov_base = + &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; + + SPDK_ERRLOG("iov_base_b8: %llu\n", iov_base_b8); + SPDK_ERRLOG("idx: %lu\n", (bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8); + SPDK_ERRLOG("iov_base: %llu\n", request->strip_buffs[sts_idx][sts_idx_ofs].iov_base); + SPDK_ERRLOG("remaining len: %lu\n", remaining_len); + SPDK_ERRLOG("iov_idx: %d", iov_idx); + + if (remaining_len >= blocks * block_size_b) { + request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = blocks * block_size_b; + len -= blocks * block_size_b; + remaining_len -= blocks * block_size_b; + } else { + request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = remaining_len; + len -= remaining_len; + for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { + request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; + request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; + len -= request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len; + } + request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_base = + bdev_io->u.bdev.iovs[end_iov_idx].iov_base; + request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_len = len; + remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; + iov_idx = end_iov_idx; + } + + if (remaining_len == 0) { + ++iov_idx; + if (iov_idx < bdev_io->u.bdev.iovcnt) { + remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; + } + } + + if (sts_idx_ofs == 1) { + request->strip_buffs[sts_idx][0].iov_len = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) + - ofs_blcks) * block_size_b; + request->strip_buffs[sts_idx][0].iov_base = calloc(request->strip_buffs[sts_idx][0].iov_len, + sizeof(char)); + if (request->strip_buffs[sts_idx][0].iov_base == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + return -ENOMEM; + } + } + + if (sts_idx == es_idx) { + return 0; + } + + // middle req strip + ret = raid5_set_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx, + &iov_idx, &remaining_len); + if (ret != 0) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + + return ret; + } + + // end req strip + es_idx_extra = ofs_blcks + num_blcks > + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx) ? + 1 : 0; + + blocks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + end_iov_idx = iov_idx; + len = remaining_len; + + while ((len / block_size_b) < blocks) { + ++end_iov_idx; + len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; + } + + request->strip_buffs_cnts[es_idx] = end_iov_idx - iov_idx + 1 + es_idx_extra; + request->strip_buffs[es_idx] = calloc(request->strip_buffs_cnts[es_idx], sizeof(struct iovec)); + if (request->strip_buffs[es_idx] == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + request->strip_buffs_cnts[es_idx] = 0; + return -ENOMEM; + } + + len = blocks * block_size_b; + + iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; + request->strip_buffs[es_idx][0].iov_base = + &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; + if (remaining_len >= blocks * block_size_b) { + request->strip_buffs[es_idx][0].iov_len = blocks * block_size_b; + len -= blocks * block_size_b; + remaining_len -= blocks * block_size_b; + } else { + request->strip_buffs[es_idx][0].iov_len = remaining_len; + len -= remaining_len; + for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { + request->strip_buffs[es_idx][i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; + request->strip_buffs[es_idx][i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; + len -= request->strip_buffs[es_idx][i - iov_idx].iov_len; + } + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = + bdev_io->u.bdev.iovs[end_iov_idx].iov_base; + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = len; + remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; + iov_idx = end_iov_idx; + } + + if (remaining_len == 0) { + ++iov_idx; + if (iov_idx < bdev_io->u.bdev.iovcnt) { + remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; + } + } + + if (es_idx_extra == 1) { + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = + ofs_blcks + num_blcks - + (raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx)); + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = + calloc(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + - 1].iov_len, + sizeof(char)); + if (request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base + == NULL) { + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + if (sts_idx_ofs == 1) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + free(request->strip_buffs[es_idx]); + request->strip_buffs[es_idx] = NULL; + request->strip_buffs_cnts[es_idx] = 0; + return -ENOMEM; + } + } + return 0; +} + +static void +raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blcks, uint64_t num_blcks) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + + SPDK_ERRLOG("raid5_free_all_strip_buffs\n"); + + raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); + + if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx)) { + free(request->strip_buffs[sts_idx][0].iov_base); + } + free(request->strip_buffs[sts_idx]); + request->strip_buffs[sts_idx] = NULL; + request->strip_buffs_cnts[sts_idx] = 0; + if (sts_idx == es_idx) { + return; + } + + raid5_free_req_strips_iovs_until(request, + raid5_next_idx(sts_idx, raid_bdev), es_idx); + + if (ofs_blcks + num_blcks > raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + + raid5_num_blcks(bdev_io, raid_bdev, es_idx)) { + free(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + - 1].iov_base); + } + free(request->strip_buffs[es_idx]); + request->strip_buffs[es_idx] = NULL; + request->strip_buffs_cnts[es_idx] = 0; +} + static int raid5_read_req_strips_set_strip_buffs(struct raid5_stripe_request *request) { From 4d53c4ce9468bf08b1a7b684c78330636a1f337d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 1 Dec 2023 01:34:31 +0000 Subject: [PATCH 33/71] refactor: Change setting&freeing strip buffs. Change setting&freeing buffs for reading of req strips except broken. --- module/bdev/raid/raid5.c | 228 +-------------------------------------- 1 file changed, 2 insertions(+), 226 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 966ea0292d5..52c6a1c3002 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -940,211 +940,12 @@ raid5_read_exc_req_strip_set_strip_buffs(struct raid5_stripe_request *request) { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; - uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); - uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); - uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); - uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; - uint64_t len; - uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; - uint64_t *iov_base_b8; uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); - int end_iov_idx; - int iov_idx = 0; - int ret = 0; - int sts_idx_ofs = 0; - int es_idx_extra = 0; SPDK_ERRLOG("raid5_read_exc_req_strip_set_strip_buffs\n"); - // not req strip - ret = raid5_get_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); - if (ret != 0) { - return ret; - } - - // start req strip - sts_idx_ofs = ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) ? - 1 : 0; - - num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); - end_iov_idx = iov_idx; - len = remaining_len; - - while ((len / block_size_b) < num_blcks) { - ++end_iov_idx; - len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; - } - - request->strip_buffs_cnts[sts_idx] = end_iov_idx - iov_idx + 1 + sts_idx_ofs; - request->strip_buffs[sts_idx] = calloc(request->strip_buffs_cnts[sts_idx], sizeof(struct iovec)); - if (request->strip_buffs[sts_idx] == NULL) { - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - request->strip_buffs_cnts[sts_idx] = 0; - return -ENOMEM; - } - - len = num_blcks * block_size_b; - - iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; - request->strip_buffs[sts_idx][sts_idx_ofs].iov_base = - &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; - - SPDK_ERRLOG("iov_base_b8: %llu\n", iov_base_b8); - SPDK_ERRLOG("idx: %lu\n", (bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8); - SPDK_ERRLOG("iov_base: %llu\n", request->strip_buffs[sts_idx][sts_idx_ofs].iov_base); - SPDK_ERRLOG("remaining len: %lu\n", remaining_len); - SPDK_ERRLOG("iov_idx: %d", iov_idx); - - if (remaining_len >= num_blcks * block_size_b) { - request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = num_blcks * block_size_b; - len -= num_blcks * block_size_b; - remaining_len -= num_blcks * block_size_b; - } else { - request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = remaining_len; - len -= remaining_len; - for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { - request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; - request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; - len -= request->strip_buffs[sts_idx][sts_idx_ofs + i - iov_idx].iov_len; - } - request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_base = - bdev_io->u.bdev.iovs[end_iov_idx].iov_base; - request->strip_buffs[sts_idx][request->strip_buffs_cnts[sts_idx] - 1].iov_len = len; - remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; - iov_idx = end_iov_idx; - } - - if (remaining_len == 0) { - ++iov_idx; - if (iov_idx < bdev_io->u.bdev.iovcnt) { - remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; - } - } - - if (sts_idx_ofs == 1) { - request->strip_buffs[sts_idx][0].iov_len = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - - ofs_blcks) * block_size_b; - request->strip_buffs[sts_idx][0].iov_base = calloc(request->strip_buffs[sts_idx][0].iov_len, - sizeof(char)); - if (request->strip_buffs[sts_idx][0].iov_base == NULL) { - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - free(request->strip_buffs[sts_idx]); - request->strip_buffs[sts_idx] = NULL; - request->strip_buffs_cnts[sts_idx] = 0; - return -ENOMEM; - } - } - - if (sts_idx == es_idx) { - return 0; - } - - // middle req strip - ret = raid5_set_req_strips_iovs_until(request, - raid5_next_idx(sts_idx, raid_bdev), es_idx, - &iov_idx, &remaining_len); - if (ret != 0) { - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); - } - free(request->strip_buffs[sts_idx]); - request->strip_buffs[sts_idx] = NULL; - request->strip_buffs_cnts[sts_idx] = 0; - - return ret; - } - - // end req strip - es_idx_extra = ofs_blcks + num_blcks > - raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + - raid5_num_blcks(bdev_io, raid_bdev, es_idx) ? - 1 : 0; - - num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); - end_iov_idx = iov_idx; - len = remaining_len; - - while ((len / block_size_b) < num_blcks) { - ++end_iov_idx; - len += bdev_io->u.bdev.iovs[end_iov_idx].iov_len; - } - - request->strip_buffs_cnts[es_idx] = end_iov_idx - iov_idx + 1 + es_idx_extra; - request->strip_buffs[es_idx] = calloc(request->strip_buffs_cnts[es_idx], sizeof(struct iovec)); - if (request->strip_buffs[es_idx] == NULL) { - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); - } - free(request->strip_buffs[sts_idx]); - request->strip_buffs[sts_idx] = NULL; - request->strip_buffs_cnts[sts_idx] = 0; - raid5_free_req_strips_iovs_until(request, - raid5_next_idx(sts_idx, raid_bdev), es_idx); - request->strip_buffs_cnts[es_idx] = 0; - return -ENOMEM; - } - - len = num_blcks * block_size_b; - - iov_base_b8 = bdev_io->u.bdev.iovs[iov_idx].iov_base; - request->strip_buffs[es_idx][0].iov_base = - &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; - if (remaining_len >= num_blcks * block_size_b) { - request->strip_buffs[es_idx][0].iov_len = num_blcks * block_size_b; - len -= num_blcks * block_size_b; - remaining_len -= num_blcks * block_size_b; - } else { - request->strip_buffs[es_idx][0].iov_len = remaining_len; - len -= remaining_len; - for (uint8_t i = iov_idx + 1; i < end_iov_idx; ++i) { - request->strip_buffs[es_idx][i - iov_idx].iov_base = bdev_io->u.bdev.iovs[i].iov_base; - request->strip_buffs[es_idx][i - iov_idx].iov_len = bdev_io->u.bdev.iovs[i].iov_len; - len -= request->strip_buffs[es_idx][i - iov_idx].iov_len; - } - request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = - bdev_io->u.bdev.iovs[end_iov_idx].iov_base; - request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = len; - remaining_len = bdev_io->u.bdev.iovs[end_iov_idx].iov_len - len; - iov_idx = end_iov_idx; - } - - if (remaining_len == 0) { - ++iov_idx; - if (iov_idx < bdev_io->u.bdev.iovcnt) { - remaining_len = bdev_io->u.bdev.iovs[iov_idx].iov_len; - } - } - - if (es_idx_extra == 1) { - request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = - ofs_blcks + num_blcks - - (raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + - raid5_num_blcks(bdev_io, raid_bdev, es_idx)); - request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = - calloc(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - - 1].iov_len, - sizeof(char)); - if (request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base - == NULL) { - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); - } - free(request->strip_buffs[sts_idx]); - request->strip_buffs[sts_idx] = NULL; - request->strip_buffs_cnts[sts_idx] = 0; - raid5_free_req_strips_iovs_until(request, - raid5_next_idx(sts_idx, raid_bdev), es_idx); - free(request->strip_buffs[es_idx]); - request->strip_buffs[es_idx] = NULL; - request->strip_buffs_cnts[es_idx] = 0; - return -ENOMEM; - } - } - return 0; + return raid5_set_all_strip_buffs(request, ofs_blcks, num_blcks); } static void @@ -1152,37 +953,12 @@ raid5_read_exc_req_strip_free_strip_buffs(struct raid5_stripe_request *request) { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; - uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); - uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); - uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); SPDK_ERRLOG("raid5_read_exc_req_strip_free_strip_buffs\n"); - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - - if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx)) { - free(request->strip_buffs[sts_idx][0].iov_base); - } - free(request->strip_buffs[sts_idx]); - request->strip_buffs[sts_idx] = NULL; - request->strip_buffs_cnts[sts_idx] = 0; - if (sts_idx == es_idx) { - return; - } - - raid5_free_req_strips_iovs_until(request, - raid5_next_idx(sts_idx, raid_bdev), es_idx); - - if (ofs_blcks + num_blcks > raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + - raid5_num_blcks(bdev_io, raid_bdev, es_idx)) { - free(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - - 1].iov_base); - } - free(request->strip_buffs[es_idx]); - request->strip_buffs[es_idx] = NULL; - request->strip_buffs_cnts[es_idx] = 0; + raid5_free_all_strip_buffs(request, ofs_blcks, num_blcks); } static void From 984e5d064f2491f37b2f92221540e4122c01261f Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:20:34 +0000 Subject: [PATCH 34/71] feat: Add fn of calculation stripe idx. --- module/bdev/raid/raid5.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 52c6a1c3002..8b4aad1538f 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -357,6 +357,14 @@ raid5_check_raid_ch(struct raid_bdev_io_channel *raid_ch) assert(raid_ch->base_channel != NULL); } +static uint64_t +raid5_stripe_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev) +{ + uint64_t start_strip_idx = bdev_io->u.bdev.offset_blocks >> + raid_bdev->strip_size_shift; + return start_strip_idx / (raid_bdev->num_base_bdevs - 1); +} + static uint64_t raid5_start_strip_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev) { From 2f2b5aae4f0ac0ee805ce6d38b4c62d5ca8206c8 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:24:56 +0000 Subject: [PATCH 35/71] refactor: Rename fn of buffs allocating. --- module/bdev/raid/raid5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 8b4aad1538f..685ece6a28e 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -544,13 +544,13 @@ raid5_free_stripe_request(struct raid5_stripe_request *request) { } static int -raid5_get_strips_buffs_until(struct raid5_stripe_request *request, +raid5_allocate_strips_buffs_until(struct raid5_stripe_request *request, uint8_t start_idx, uint8_t until_idx, uint64_t num_blcks) { struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; - SPDK_ERRLOG("raid5_get_strips_buffs_until\n"); + SPDK_ERRLOG("raid5_allocate_strips_buffs_until\n"); for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { request->strip_buffs_cnts[idx] = 1; @@ -689,7 +689,7 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc SPDK_ERRLOG("raid5_set_all_strip_buffs\n"); // not req strip - ret = raid5_get_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); + ret = raid5_allocate_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); if (ret != 0) { return ret; } From 56adf6b7783f481729be78673e517fc2541fd969 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:27:09 +0000 Subject: [PATCH 36/71] feat: Add fn of setting all req strips iovs. --- module/bdev/raid/raid5.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 685ece6a28e..1fcc4535f62 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -667,6 +667,32 @@ raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, } } +static int +raid5_set_all_req_strips_iovs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; + int iov_idx = 0; + + SPDK_ERRLOG("raid5_set_all_req_strips_iovs\n"); + return raid5_set_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx, &iov_idx, &remaining_len); +} + +static void +raid5_free_all_req_strips_iovs(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint8_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + + raid5_free_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx); +} + static int raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blcks, uint64_t num_blcks) { From 90a9a95bd30c1ab9ca206fbd799ef5d44048ad05 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:28:50 +0000 Subject: [PATCH 37/71] fix: Fix comment. --- module/bdev/raid/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 1fcc4535f62..805d9371330 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -714,7 +714,7 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc SPDK_ERRLOG("raid5_set_all_strip_buffs\n"); - // not req strip + // not req strip and parity strip ret = raid5_allocate_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); if (ret != 0) { return ret; From 0bd2d719ead29d9603fc16a4ecac5318ef7c880d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:30:34 +0000 Subject: [PATCH 38/71] fix: Fix calculation of data length. --- module/bdev/raid/raid5.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 805d9371330..4bc8a73068f 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -877,9 +877,10 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc if (es_idx_extra == 1) { request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len = - ofs_blcks + num_blcks - + (ofs_blcks + num_blcks - (raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + - raid5_num_blcks(bdev_io, raid_bdev, es_idx)); + raid5_num_blcks(bdev_io, raid_bdev, es_idx))) * + block_size_b; request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = calloc(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_len, From 646bd383e4f7870f77168630470286359fa46721 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:34:54 +0000 Subject: [PATCH 39/71] feat: Simplify fn of setting req strips buffs for read req. --- module/bdev/raid/raid5.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 4bc8a73068f..683b45e021d 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -944,30 +944,17 @@ raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_bl static int raid5_read_req_strips_set_strip_buffs(struct raid5_stripe_request *request) { - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); - struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; - uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); - uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); - uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; - int iov_idx = 0; - SPDK_ERRLOG("raid5_read_req_strips_set_strip_buffs\n"); - return raid5_set_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx, &iov_idx, &remaining_len); + return raid5_set_all_req_strips_iovs(request); } static void raid5_read_req_strips_free_strip_buffs(struct raid5_stripe_request *request) { - struct raid_bdev_io *raid_io = request->raid_io; - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - struct raid_bdev *raid_bdev = raid_io->raid_bdev; - uint8_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); - uint8_t after_estrip_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); - SPDK_ERRLOG("raid5_read_req_strips_free_strip_buffs\n"); - raid5_free_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx); + raid5_free_all_req_strips_iovs(request); } static int From 1b6a06c8477b033b045cf599ee94642d38f8019e Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:37:15 +0000 Subject: [PATCH 40/71] feat: Add fn to set/free strips buffs for default writing. --- module/bdev/raid/raid5.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 683b45e021d..66b877eda48 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -983,6 +983,44 @@ raid5_read_exc_req_strip_free_strip_buffs(struct raid5_stripe_request *request) raid5_free_all_strip_buffs(request, ofs_blcks, num_blcks); } +static int +raid5_write_default_set_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t es_ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + + SPDK_ERRLOG("raid5_write_default_set_strip_buffs\n"); + + if (sts_idx != es_idx) { + return raid5_set_all_strip_buffs(request, es_ofs_blcks, raid_bdev->strip_size); + } else { + return raid5_set_all_strip_buffs(request, es_ofs_blcks, es_num_blcks); + } +} + +static void +raid5_write_default_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t es_ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + + SPDK_ERRLOG("raid5_write_default_free_strip_buffs\n"); + + if (sts_idx != es_idx) { + return raid5_free_all_strip_buffs(request, es_ofs_blcks, raid_bdev->strip_size); + } else { + return raid5_free_all_strip_buffs(request, es_ofs_blcks, es_num_blcks); + } +} + static void raid5_stripe_req_complete(struct raid5_stripe_request *request) { From cf113b2b39f16a369efff91a32b454958c5d6006 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:39:41 +0000 Subject: [PATCH 41/71] feat: Add fn to set/free strips buffs for writing with broken parity. --- module/bdev/raid/raid5.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 66b877eda48..b180bd21dd3 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1021,6 +1021,22 @@ raid5_write_default_free_strip_buffs(struct raid5_stripe_request *request) } } +static int +raid5_write_broken_parity_set_strip_buffs(struct raid5_stripe_request *request) +{ + SPDK_ERRLOG("raid5_write_broken_parity_set_strip_buffs\n"); + + return raid5_set_all_req_strips_iovs(request); +} + +static void +raid5_write_broken_parity_free_strip_buffs(struct raid5_stripe_request *request) +{ + SPDK_ERRLOG("raid5_write_broken_parity_free_strip_buffs\n"); + + raid5_free_all_req_strips_iovs(request); +} + static void raid5_stripe_req_complete(struct raid5_stripe_request *request) { From a9a17bedec5d343fa536f0debb2c840f7c7db357 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:41:28 +0000 Subject: [PATCH 42/71] feat: Add fn to set/reset/free strips buffs for read modify write case. --- module/bdev/raid/raid5.c | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index b180bd21dd3..d49d9477ba0 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1037,6 +1037,101 @@ raid5_write_broken_parity_free_strip_buffs(struct raid5_stripe_request *request) raid5_free_all_req_strips_iovs(request); } +static int +raid5_write_r_modify_w_set_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t sts_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + uint64_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + int ret = 0; + + ret = raid5_allocate_strips_buffs_until(request, sts_idx, after_sts_idx, sts_num_blcks); + if (ret != 0) { + return ret; + } + + if (sts_idx == es_idx) { + ret = raid5_allocate_strips_buffs_until(request, ps_idx, after_ps_idx, sts_num_blcks); + if (ret != 0) { + raid5_free_strips_buffs_until(request, sts_idx, after_sts_idx); + } + return ret; + } else { + ret = raid5_allocate_strips_buffs_until(request, ps_idx, after_ps_idx, raid_bdev->strip_size); + if (ret != 0) { + raid5_free_strips_buffs_until(request, sts_idx, after_sts_idx); + return ret; + } + } + + ret = raid5_allocate_strips_buffs_until(request, after_sts_idx, es_idx, raid_bdev->strip_size); + if (ret != 0) { + raid5_free_strips_buffs_until(request, sts_idx, after_sts_idx); + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + return ret; + } + + ret = raid5_allocate_strips_buffs_until(request, es_idx, after_es_idx, es_num_blcks); + if (ret != 0) { + raid5_free_strips_buffs_until(request, sts_idx, es_idx); + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + } + return ret; +} + +static void +raid5_w_r_modify_w_reading_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + raid5_free_strips_buffs_until(request, sts_idx, after_es_idx); +} + +static int +raid5_write_r_modify_w_reset_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(raid5_end_strip_idx(bdev_io, raid_bdev), raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + int ret = 0; + + raid5_free_strips_buffs_until(request, sts_idx, after_es_idx); + ret = raid5_set_all_req_strips_iovs(request); + if (ret != 0) { + SPDK_ERRLOG("chop chop\n"); + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + } + return ret; +} + +static void +raid5_w_r_modify_w_writing_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + raid5_free_all_req_strips_iovs(request); +} + static void raid5_stripe_req_complete(struct raid5_stripe_request *request) { From a8631951ff7ea64a521eb491ea992b786affc775 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:44:34 +0000 Subject: [PATCH 43/71] refactor: Rename cb fn for read request. --- module/bdev/raid/raid5.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index d49d9477ba0..5ab237ba6a4 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1219,10 +1219,10 @@ raid5_read_complete_part_final(struct raid5_stripe_request *request, uint64_t co } } -static void raid5_read_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { +static void raid5_read_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_read_complete_part\n"); + SPDK_ERRLOG("raid5_read_cb\n"); spdk_bdev_free_io(bdev_io); @@ -1270,7 +1270,7 @@ raid5_read_req_strips(struct raid5_stripe_request *request) request->strip_buffs[idx], request->strip_buffs_cnts[idx], raid5_ofs_blcks(bdev_io, raid_bdev, idx), raid5_num_blcks(bdev_io, raid_bdev, idx), - raid5_read_complete_part, + raid5_read_cb, request); if (spdk_unlikely(ret != 0)) { @@ -1343,7 +1343,7 @@ raid5_read_except_one_req_strip(struct raid5_stripe_request *request) ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, request->strip_buffs[idx], request->strip_buffs_cnts[idx], ofs_blcks, num_blcks, - raid5_read_complete_part, + raid5_read_cb, request); if (spdk_unlikely(ret != 0)) { From 4950b151af1bf3e664fc92f51cb992e9276389a1 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:46:31 +0000 Subject: [PATCH 44/71] refactor: Rename part completion fn for read req. --- module/bdev/raid/raid5.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 5ab237ba6a4..36c025c3907 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1143,12 +1143,12 @@ raid5_stripe_req_complete(struct raid5_stripe_request *request) } static bool -raid5_read_complete_part_final(struct raid5_stripe_request *request, uint64_t completed, +raid5_read_complete_part(struct raid5_stripe_request *request, uint64_t completed, enum spdk_bdev_io_status status) { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_read_complete_part_final\n"); + SPDK_ERRLOG("raid5_read_complete_part\n"); assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1226,7 +1226,7 @@ static void raid5_read_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_a spdk_bdev_free_io(bdev_io); - raid5_read_complete_part_final(request, 1, success ? + raid5_read_complete_part(request, 1, success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED); } @@ -1283,7 +1283,7 @@ raid5_read_req_strips(struct raid5_stripe_request *request) base_bdev_io_not_submitted = ((estrip_idx + raid_bdev->num_base_bdevs) - ststrip_idx) % raid_bdev->num_base_bdevs + 1 - raid_io->base_bdev_io_submitted; - raid5_read_complete_part_final(request, base_bdev_io_not_submitted, + raid5_read_complete_part(request, base_bdev_io_not_submitted, SPDK_BDEV_IO_STATUS_FAILED); return; } @@ -1355,7 +1355,7 @@ raid5_read_except_one_req_strip(struct raid5_stripe_request *request) base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 1 - raid_io->base_bdev_io_submitted; - raid5_read_complete_part_final(request, base_bdev_io_not_submitted, + raid5_read_complete_part(request, base_bdev_io_not_submitted, SPDK_BDEV_IO_STATUS_FAILED); return; } From 31991af9425b53aeb040458e08653c1351796fcd Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:47:48 +0000 Subject: [PATCH 45/71] fix: Delete assert from part completion read request. --- module/bdev/raid/raid5.c | 1 - 1 file changed, 1 deletion(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 36c025c3907..2797c4d66c8 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1207,7 +1207,6 @@ raid5_read_complete_part(struct raid5_stripe_request *request, uint64_t complete } raid5_read_exc_req_strip_free_strip_buffs(request); - assert(0); } else { raid5_read_req_strips_free_strip_buffs(request); } From 7adb8784761609a009fb58c0022748592b337cc1 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:49:51 +0000 Subject: [PATCH 46/71] feat: Add cb and part completion fn for default write req writing part. --- module/bdev/raid/raid5.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 2797c4d66c8..477afb8ae1f 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1434,6 +1434,44 @@ raid5_submit_read_request(struct raid5_stripe_request *request) } } +static bool +raid5_w_default_writing_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_w_default_writing_complete_part\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + raid5_write_default_free_strip_buffs(request); + raid5_stripe_req_complete(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_default_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_w_default_writing_cb\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_w_default_writing_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 08430cfc4ed7737451b470f0b28a38c3964323d7 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:51:40 +0000 Subject: [PATCH 47/71] feat: Add default write req/writing part. --- module/bdev/raid/raid5.c | 105 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 477afb8ae1f..c41fa73af9c 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1472,6 +1472,111 @@ raid5_w_default_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_ SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_default_writing(struct raid5_stripe_request *request); + +static void +_raid5_write_default_writing(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_default_writing(request); +} + +static void +raid5_write_default_writing(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t num_strips_to_w = (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) + 2; + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + if (es_idx == sts_idx) { + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + } else { + num_blcks = raid_bdev->strip_size; + } + + if (raid_io->base_bdev_io_submitted == 0) { + base_info = &raid_bdev->base_bdev_info[ps_idx]; + base_ch = raid_ch->base_channel[ps_idx]; + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[ps_idx], request->strip_buffs_cnts[ps_idx], + ofs_blcks, num_blcks, + raid5_w_default_writing_cb, + request); + + SPDK_ERRLOG("par idx: %d\npar ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", + ps_idx, + ret, + request->strip_buffs_cnts[ps_idx], + request->strip_buffs[ps_idx][0].iov_base, + ofs_blcks, + num_blcks); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_default_writing, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_w - raid_io->base_bdev_io_submitted; + raid5_w_default_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + ++raid_io->base_bdev_io_submitted; + } + start_idx = (sts_idx + raid_io->base_bdev_io_submitted - 1) % raid_bdev->num_base_bdevs; + + for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_default_writing_cb, + request); + + SPDK_ERRLOG("req idx: %d\nreq ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", + idx, + ret, + request->strip_buffs_cnts[idx], + request->strip_buffs[idx][0].iov_base, + ofs_blcks, + num_blcks); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_default_writing, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_w - raid_io->base_bdev_io_submitted; + raid5_w_default_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 617226cdd1749654d3cc66f5f3fd825da7d8d3c7 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:53:28 +0000 Subject: [PATCH 48/71] feat: Add cb and part completion fn for default write req reading part. --- module/bdev/raid/raid5.c | 82 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index c41fa73af9c..d9b3a99dc29 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1577,6 +1577,88 @@ raid5_write_default_writing(struct raid5_stripe_request *request) } } +static bool +raid5_w_default_reading_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_w_default_reading_complete_part\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + uint64_t block_size_b8 = ((uint64_t)128 * raid_bdev->strip_size_kb) / + raid_bdev->strip_size; + uint8_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint8_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + uint64_t num_b8; + + if (raid_io->base_bdev_io_status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid5_write_default_free_strip_buffs(request); + raid5_stripe_req_complete(request); + return true; + } + + if (sts_idx != es_idx) { + num_b8 = raid_bdev->strip_size * block_size_b8; + } else { + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, sts_idx) * block_size_b8; + } + + for (uint8_t i = sts_idx; i != ps_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + + for (uint8_t i = after_ps_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) + 2; + + for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; ++i) { + uint64_t *b8 = request->strip_buffs[i][0].iov_base; + SPDK_ERRLOG("%d strip, first 8 byte: %llu\n", i, b8[0]); + } + + raid5_write_default_writing(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_default_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_w_default_reading_cb\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_w_default_reading_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From c6edf68675ffe3938408d944d6e99173497fcc67 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:54:40 +0000 Subject: [PATCH 49/71] feat: Add default write req/reading part. --- module/bdev/raid/raid5.c | 134 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index d9b3a99dc29..d540c5f5111 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1659,6 +1659,140 @@ raid5_w_default_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_ SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_default_reading(struct raid5_stripe_request *request); + +static void +_raid5_write_default_reading(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_default_reading(request); +} + +static void +raid5_write_default_reading(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t sts_ofs = raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t es_ofs = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t num_strips_to_r; + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_write_default_reading\n"); + + if (sts_idx != es_idx) { + num_strips_to_r = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs); + if (raid_io->base_bdev_io_submitted == 0) { + if (sts_ofs > es_ofs) { + base_info = &raid_bdev->base_bdev_info[sts_idx]; + base_ch = raid_ch->base_channel[sts_idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + &request->strip_buffs[sts_idx][0], 1, + es_ofs, sts_ofs - es_ofs, + raid5_w_default_reading_cb, + request); + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_default_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_default_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + } else { + raid5_w_default_reading_complete_part(request, 1, + SPDK_BDEV_IO_STATUS_SUCCESS); + } + ++raid_io->base_bdev_io_submitted; + } + if (raid_io->base_bdev_io_submitted == 1) { + if (raid_bdev->strip_size > es_num_blcks) { + base_info = &raid_bdev->base_bdev_info[es_idx]; + base_ch = raid_ch->base_channel[es_idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + &request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1], 1, + es_ofs + es_num_blcks, raid_bdev->strip_size - es_num_blcks, + raid5_w_default_reading_cb, + request); + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_default_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_default_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + } else { + raid5_w_default_reading_complete_part(request, 1, + SPDK_BDEV_IO_STATUS_SUCCESS); + } + ++raid_io->base_bdev_io_submitted; + } + start_idx = (after_es_idx + raid_io->base_bdev_io_submitted - 2) % raid_bdev->num_base_bdevs; + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + num_blcks = raid_bdev->strip_size; + } else { + num_strips_to_r = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) - 2; + start_idx = (after_es_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); + } + + for (uint8_t idx = start_idx; idx != sts_idx; idx = raid5_next_idx(idx, raid_bdev)) { + if (idx == ps_idx) { + continue; + } + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_default_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_default_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_default_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 290813f7115d464b79192acc9e8c4dd04fde4a84 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:55:58 +0000 Subject: [PATCH 50/71] feat: Add cb and part completion fns for write req with broken parity strip. --- module/bdev/raid/raid5.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index d540c5f5111..c0fc0e0803c 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1793,6 +1793,44 @@ raid5_write_default_reading(struct raid5_stripe_request *request) } } +static bool +raid5_w_broken_ps_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_w_broken_ps_complete_part\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + raid5_write_broken_parity_free_strip_buffs(request); + raid5_stripe_req_complete(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_broken_ps_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_w_broken_ps_cb\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_w_broken_ps_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 099910b28d89f7463c083144e881522678a49a1e Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:57:31 +0000 Subject: [PATCH 51/71] feat: Add fn to write req with broken parity strip. --- module/bdev/raid/raid5.c | 70 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index c0fc0e0803c..106d70a5608 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1831,6 +1831,76 @@ raid5_w_broken_ps_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_broken_parity_strip(struct raid5_stripe_request *request); + +static void +_raid5_write_broken_parity_strip(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_broken_parity_strip(request); +} + +static void +raid5_write_broken_parity_strip(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_write_broken_parity_strip\n"); + + start_idx = (sts_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + + for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_broken_ps_cb, + request); + + SPDK_ERRLOG("req idx: %d\nreq ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", + idx, + ret, + request->strip_buffs_cnts[idx], + request->strip_buffs[idx][0].iov_base, + ofs_blcks, + num_blcks); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_parity_strip, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 1 - raid_io->base_bdev_io_submitted; + raid5_w_broken_ps_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From fbfd5c1b17fa87c90c7c5f1c32613088b901d395 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 02:59:13 +0000 Subject: [PATCH 52/71] feat: Add cb and part completion fns for read-modify-write writing part. --- module/bdev/raid/raid5.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 106d70a5608..b333d5e4411 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1901,6 +1901,44 @@ raid5_write_broken_parity_strip(struct raid5_stripe_request *request) } } +static bool +raid5_w_r_modify_w_writing_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_w_r_modify_w_writing_complete_part\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + raid5_w_r_modify_w_writing_free_strip_buffs(request); + raid5_stripe_req_complete(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_r_modify_w_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_w_r_modify_w_writing_cb\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_w_r_modify_w_writing_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From faa0b22361370c75b6b2835099997515acb0d340 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:01:23 +0000 Subject: [PATCH 53/71] feat: Add fn for write req/read-modify-write case/writing part. --- module/bdev/raid/raid5.c | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index b333d5e4411..66b8674be98 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1939,6 +1939,101 @@ raid5_w_r_modify_w_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void * SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_r_modify_w_writing(struct raid5_stripe_request *request); + +static void +_raid5_write_r_modify_w_writing(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_r_modify_w_writing(request); +} + +static void +raid5_write_r_modify_w_writing(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_write_r_modify_w_writing\n"); + + if (raid_io->base_bdev_io_submitted == 0) { + base_info = &raid_bdev->base_bdev_info[ps_idx]; + base_ch = raid_ch->base_channel[ps_idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + if (es_idx == sts_idx) { + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + } else { + num_blcks = raid_bdev->strip_size; + } + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[ps_idx], request->strip_buffs_cnts[ps_idx], + ofs_blcks, num_blcks, + raid5_w_r_modify_w_writing_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_r_modify_w_writing, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 2 - raid_io->base_bdev_io_submitted; + raid5_w_r_modify_w_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + ++raid_io->base_bdev_io_submitted; + } + start_idx = (sts_idx + raid_io->base_bdev_io_submitted - 1) % raid_bdev->num_base_bdevs; + + for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_r_modify_w_writing_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_r_modify_w_writing, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 2 - raid_io->base_bdev_io_submitted; + raid5_w_r_modify_w_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 3bb2b10010724fff06ac001700806e904744149f Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:02:29 +0000 Subject: [PATCH 54/71] feat: Add cb and part completion fns for read-modify-write reading part. --- module/bdev/raid/raid5.c | 87 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 66b8674be98..3578dedf1fe 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2034,6 +2034,93 @@ raid5_write_r_modify_w_writing(struct raid5_stripe_request *request) } } +static bool +raid5_w_r_modify_w_reading_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + SPDK_ERRLOG("raid5_w_r_modify_w_reading_complete_part\n"); + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + uint64_t block_size_b8 = ((uint64_t)128 * raid_bdev->strip_size_kb) / + raid_bdev->strip_size; + uint8_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint8_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t ofs_b8; + uint64_t num_b8; + int ret = 0; + + if (raid_io->base_bdev_io_status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid5_w_r_modify_w_reading_free_strip_buffs(request); + raid5_stripe_req_complete(request); + return true; + } + + for (uint8_t i = sts_idx; i != after_es_idx; i = raid5_next_idx(i, raid_bdev)) { + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, i) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, i) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + + ret = raid5_write_r_modify_w_reset_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return true; + } + + for (uint8_t i = sts_idx; i != after_es_idx; i = raid5_next_idx(i, raid_bdev)) { + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, i) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, i) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) + 2; + + raid5_write_r_modify_w_writing(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_r_modify_w_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + SPDK_ERRLOG("raid5_w_r_modify_w_reading_cb\n"); + + spdk_bdev_free_io(bdev_io); + + raid5_w_r_modify_w_reading_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 463fc6d597f56e6f83684294baf699ff586b7186 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:04:01 +0000 Subject: [PATCH 55/71] feat: Add fn for write req/read-modify-write case/reading part. --- module/bdev/raid/raid5.c | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 3578dedf1fe..92cf7f15a43 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2121,6 +2121,101 @@ raid5_w_r_modify_w_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void * SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_r_modify_w_reading(struct raid5_stripe_request *request); + +static void +_raid5_write_r_modify_w_reading(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_r_modify_w_reading(request); +} + +static void +raid5_write_r_modify_w_reading(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + SPDK_ERRLOG("raid5_write_r_modify_w_reading\n"); + + if (raid_io->base_bdev_io_submitted == 0) { + base_info = &raid_bdev->base_bdev_info[ps_idx]; + base_ch = raid_ch->base_channel[ps_idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + if (es_idx == sts_idx) { + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + } else { + num_blcks = raid_bdev->strip_size; + } + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[ps_idx], request->strip_buffs_cnts[ps_idx], + ofs_blcks, num_blcks, + raid5_w_r_modify_w_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_r_modify_w_reading, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 2 - raid_io->base_bdev_io_submitted; + raid5_w_r_modify_w_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + ++raid_io->base_bdev_io_submitted; + } + start_idx = (sts_idx + raid_io->base_bdev_io_submitted - 1) % raid_bdev->num_base_bdevs; + + for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_r_modify_w_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_r_modify_w_reading, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 2 - raid_io->base_bdev_io_submitted; + raid5_w_r_modify_w_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From a5912d41a2c1042f2a914f997e5684f1f84f6a93 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:05:28 +0000 Subject: [PATCH 56/71] feat: Add fn for submitting write request. --- module/bdev/raid/raid5.c | 86 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 92cf7f15a43..53d25e90246 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2216,6 +2216,92 @@ raid5_write_r_modify_w_reading(struct raid5_stripe_request *request) } } +static void +raid5_submit_write_request(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid5_info *r5_info = raid_bdev->module_private; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint8_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + int ret = 0; + + for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { + base_ch = raid_ch->base_channel[idx]; + + if (base_ch == NULL) { + if (request->broken_strip_idx == raid_bdev->num_base_bdevs) { + request->broken_strip_idx = idx; + } else { + SPDK_ERRLOG("RAID5 write request: 2 broken strips\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + assert(false); + return; + } + } + } + + if (request->broken_strip_idx == raid_bdev->num_base_bdevs && + r5_info->write_type == DEFAULT) { + // default + + ret = raid5_write_default_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) - 2; + if (sts_idx != es_idx) { + raid_io->base_bdev_io_remaining +=2; + } + raid5_write_default_reading(request); + } else if (request->broken_strip_idx == ps_idx) { + // broken parity strip + + ret = raid5_write_broken_parity_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 1; + raid5_write_broken_parity_strip(request); + } else if (request->broken_strip_idx == raid_bdev->num_base_bdevs || + !raid5_is_req_strip(sts_idx, es_idx,request->broken_strip_idx)) { + // read-modify-write + + ret = raid5_write_r_modify_w_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 2; + raid5_write_r_modify_w_reading(request); + } else { + // broken req strip + } +} + static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); static void From 7cbae49080f8b647e03d52a9bcd7c035f4884728 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:11:50 +0000 Subject: [PATCH 57/71] refactor: Delete old version of fn for submitting write request. --- module/bdev/raid/raid5.c | 102 --------------------------------------- 1 file changed, 102 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 53d25e90246..255dccfab82 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2624,108 +2624,6 @@ raid5_submit_write_request_writing(struct raid5_io_buffer *io_buffer) } } -static void -raid5_submit_write_request(struct raid_bdev_io *raid_io) -{ - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - struct spdk_bdev_ext_io_opts io_opts = {}; - struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; - struct raid_bdev *raid_bdev = raid_io->raid_bdev; - uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; - uint8_t broken_bdev_idx = raid_bdev->num_base_bdevs; - uint64_t stripe_index; - uint64_t parity_strip_idx; - uint64_t req_bdev_idx; - uint32_t offset_in_strip; - uint64_t offset_blocks; - uint64_t num_blocks; - int ret = 0; - uint64_t start_strip_idx; - uint64_t end_strip_idx; - struct raid_base_bdev_info *base_info; - struct spdk_io_channel *base_ch; - - start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - end_strip_idx = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> - raid_bdev->strip_size_shift; - if (start_strip_idx != end_strip_idx) { - SPDK_ERRLOG("I/O spans strip boundary!\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - return; - } - - assert(raid_ch != NULL); - assert(raid_ch->base_channel); - - io_opts.size = sizeof(io_opts); - io_opts.memory_domain = bdev_io->u.bdev.memory_domain; - io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; - io_opts.metadata = bdev_io->u.bdev.md_buf; - - stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); - parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); - offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - - req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); - if (req_bdev_idx >= parity_strip_idx) { - ++req_bdev_idx; - } - offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; - num_blocks = bdev_io->u.bdev.num_blocks; - - // calculating of broken strip idx - for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { - if (raid_ch->base_channel[idx] == NULL) { - if (broken_bdev_idx == raid_bdev->num_base_bdevs) { - broken_bdev_idx = idx; - } else { - SPDK_ERRLOG("2 broken strips\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - return; - } - } - } - - if (broken_bdev_idx == parity_strip_idx) { - // case: broken parity strip - - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_bdev_io_completion, - raid_io, &io_opts); - - if (ret == -ENOMEM) { - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - } else if (ret != 0) { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - } - } else { - // cases with parity recalculating - - struct raid5_io_buffer *io_buffer; - - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - - io_buffer = raid5_get_io_buffer(raid_io, num_blocks * block_size_b); - if (io_buffer == NULL) { - raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_rw_request); - return; - } - - raid5_submit_write_request_reading(io_buffer); - } -} - static void raid5_submit_rw_request(struct raid_bdev_io *raid_io) { From 2064f11c2d13b4775914d220b567814c49d6b194 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:14:07 +0000 Subject: [PATCH 58/71] feat: Add calling of submitting write request to fn of submitting rw request. --- module/bdev/raid/raid5.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 255dccfab82..78bd1ddea98 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2690,8 +2690,7 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) raid5_submit_read_request(request); break; case SPDK_BDEV_IO_TYPE_WRITE: - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; - raid5_stripe_req_complete(request); + raid5_submit_write_request(request); break; default: SPDK_ERRLOG("RAID5: Invalid request type"); From bffc7fc56fa44be8418ba8dea11b5f545797a19d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Fri, 15 Dec 2023 03:22:55 +0000 Subject: [PATCH 59/71] fix: Fixed calculation start strip to submit in default write req/reading part. --- module/bdev/raid/raid5.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 78bd1ddea98..c481a9d457b 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1763,6 +1763,10 @@ raid5_write_default_reading(struct raid5_stripe_request *request) num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); } + if (start_idx == ps_idx) { + start_idx = raid5_next_idx(start_idx, raid_bdev); + } + for (uint8_t idx = start_idx; idx != sts_idx; idx = raid5_next_idx(idx, raid_bdev)) { if (idx == ps_idx) { continue; From a88afe91aa1046014e1b5d16574f695de54a6703 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 01:52:49 +0000 Subject: [PATCH 60/71] fix: Fix fn that xors iovs with iovs. --- module/bdev/raid/raid5.c | 61 +++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index c481a9d457b..793ad3fdf14 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -461,8 +461,6 @@ raid5_xor_iovs_with_iovs(struct iovec *xor_iovs, int xor_iovcnt, uint64_t xor_of uint64_t xor_idx = 0; uint64_t idx = 0; - SPDK_ERRLOG("raid5_xor_iovs_with_iovs\n"); - while (xofs8 >= xor_iovs[xor_idx].iov_len / 8) { xofs8 -= xor_iovs[xor_idx].iov_len / 8; ++xor_idx; @@ -480,31 +478,54 @@ raid5_xor_iovs_with_iovs(struct iovec *xor_iovs, int xor_iovcnt, uint64_t xor_of b8 = &b8[ofs8]; if (xor_iovs[xor_idx].iov_len / 8 - xofs8 > iovs[idx].iov_len / 8 - ofs8) { - for (uint64_t i = ofs8; i < (iovs[idx].iov_len / 8); ++i) { - xb8[i - ofs8 + xofs8] ^= b8[i]; + if (num_b8 + ofs8 < iovs[idx].iov_len / 8) { + for (uint64_t i = 0; i < num_b8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 = 0; + } else { + for (uint64_t i = 0; i < (iovs[idx].iov_len / 8) - ofs8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 -= iovs[idx].iov_len / 8 - ofs8; + xofs8 += iovs[idx].iov_len / 8 - ofs8; + ++idx; + ofs8 = 0; } - num_b8 -= iovs[idx].iov_len / 8 - ofs8; - ++idx; - ofs8 = 0; } else if (xor_iovs[xor_idx].iov_len / 8 - xofs8 < iovs[idx].iov_len / 8 - ofs8) { - for (uint64_t i = xofs8; i < (xor_iovs[xor_idx].iov_len / 8); ++i) { - xb8[i] ^= b8[i - xofs8 + ofs8]; + if (num_b8 + xofs8 < xor_iovs[xor_idx].iov_len / 8) { + for (uint64_t i = 0; i < num_b8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 = 0; + } else { + for (uint64_t i = 0; i < (xor_iovs[xor_idx].iov_len / 8) - xofs8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 -= xor_iovs[xor_idx].iov_len / 8 - xofs8; + ofs8 += xor_iovs[xor_idx].iov_len / 8 - xofs8; + ++xor_idx; + xofs8 = 0; } - num_b8 -= xor_iovs[xor_idx].iov_len / 8 - xofs8; - ++xor_idx; - xofs8 = 0; } else { - for (uint64_t i = ofs8; i < (iovs[idx].iov_len / 8); ++i) { - xb8[i - ofs8 + xofs8] ^= b8[i]; + if (num_b8 + ofs8 < iovs[idx].iov_len / 8) { + for (uint64_t i = 0; i < num_b8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 = 0; + } else { + for (uint64_t i = 0; i < (iovs[idx].iov_len / 8)- ofs8; ++i) { + xb8[i] ^= b8[i]; + } + num_b8 -= iovs[idx].iov_len / 8 - ofs8; + ++idx; + ofs8 = 0; + ++xor_idx; + xofs8 = 0; } - num_b8 -= iovs[idx].iov_len / 8 - ofs8; - ++idx; - ofs8 = 0; - ++xor_idx; - xofs8 = 0; } - } + } } static struct raid5_stripe_request * From cfee2dc4482572e97ef33c0143d46dcd6a1de72a Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 01:57:26 +0000 Subject: [PATCH 61/71] feat: Add fn to set/reset/free strips buffs for write request with broken req strip. --- module/bdev/raid/raid5.c | 88 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 793ad3fdf14..31678e238a5 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -1153,6 +1153,94 @@ raid5_w_r_modify_w_writing_free_strip_buffs(struct raid5_stripe_request *request raid5_free_all_req_strips_iovs(request); } +static int +raid5_write_broken_req_set_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + + if (sts_idx == es_idx) { + return raid5_allocate_strips_buffs_until(request, after_sts_idx, sts_idx, + raid5_num_blcks(bdev_io, raid_bdev, sts_idx)); + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + return raid5_allocate_strips_buffs_until(request, es_idx, after_sts_idx, raid_bdev->strip_size); + } else if (request->broken_strip_idx == sts_idx) { + return raid5_allocate_strips_buffs_until(request, after_sts_idx, sts_idx, raid_bdev->strip_size); + } else { + return raid5_allocate_strips_buffs_until(request, after_es_idx, es_idx, raid_bdev->strip_size); + } +} + +static void +raid5_w_br_r_reading_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + + if (sts_idx == es_idx) { + raid5_free_strips_buffs_until(request, after_sts_idx, sts_idx); + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + raid5_free_strips_buffs_until(request, es_idx, after_sts_idx); + } else if (request->broken_strip_idx == sts_idx) { + raid5_free_strips_buffs_until(request, after_sts_idx, sts_idx); + } else { + raid5_free_strips_buffs_until(request, after_es_idx, es_idx); + } +} + +static int +raid5_write_broken_req_reset_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + int ret = 0; + + if (sts_idx == es_idx) { + raid5_free_strips_buffs_until(request, after_sts_idx, ps_idx); + raid5_free_strips_buffs_until(request, after_ps_idx, sts_idx); + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + raid5_free_strips_buffs_until(request, es_idx, ps_idx); + raid5_free_strips_buffs_until(request, after_ps_idx, after_sts_idx); + } else if (request->broken_strip_idx == sts_idx) { + raid5_free_strips_buffs_until(request, after_sts_idx, ps_idx); + raid5_free_strips_buffs_until(request, after_ps_idx, sts_idx); + } else { + raid5_free_strips_buffs_until(request, after_es_idx, ps_idx); + raid5_free_strips_buffs_until(request, after_ps_idx, es_idx); + } + ret = raid5_set_all_req_strips_iovs(request); + if (ret != 0) { + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + } + return ret; +} + +static void +raid5_w_br_r_writing_free_strip_buffs(struct raid5_stripe_request *request) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(request->raid_io); + struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + + raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); + raid5_free_all_req_strips_iovs(request); +} + static void raid5_stripe_req_complete(struct raid5_stripe_request *request) { From 658d6920d11ce879552833b6ee1d08559cc0a910 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:00:20 +0000 Subject: [PATCH 62/71] feat: Add cb and part completion fns for write with broken req strip writing part. --- module/bdev/raid/raid5.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 31678e238a5..94fbc8c35b5 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2329,6 +2329,40 @@ raid5_write_r_modify_w_reading(struct raid5_stripe_request *request) } } +static bool +raid5_w_br_r_writing_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + raid5_w_br_r_writing_free_strip_buffs(request); + raid5_stripe_req_complete(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_br_r_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + spdk_bdev_free_io(bdev_io); + + raid5_w_br_r_writing_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_write_request(struct raid5_stripe_request *request) { From 382508e87d328ee4abbb39c93c2019b2d990b9f2 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:01:58 +0000 Subject: [PATCH 63/71] feat: Add fn for write req/broken req strip case/writing part. --- module/bdev/raid/raid5.c | 99 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 94fbc8c35b5..24117ccdc6c 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2363,6 +2363,105 @@ raid5_w_br_r_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_broken_req_writing(struct raid5_stripe_request *request); + +static void +_raid5_write_broken_req_writing(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_broken_req_writing(request); +} + +static void +raid5_write_broken_req_writing(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + if (raid_io->base_bdev_io_submitted == 0) { + base_info = &raid_bdev->base_bdev_info[ps_idx]; + base_ch = raid_ch->base_channel[ps_idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + if (es_idx == sts_idx) { + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + } else { + num_blcks = raid_bdev->strip_size; + } + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[ps_idx], request->strip_buffs_cnts[ps_idx], + ofs_blcks, num_blcks, + raid5_w_br_r_writing_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_writing, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 1 - raid_io->base_bdev_io_submitted; + raid5_w_br_r_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + ++raid_io->base_bdev_io_submitted; + } + start_idx = (sts_idx + raid_io->base_bdev_io_submitted - 1) % raid_bdev->num_base_bdevs; + if (start_idx == request->broken_strip_idx) { + start_idx = raid5_next_idx(start_idx, raid_bdev); + } + + for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + if (idx == request->broken_strip_idx) { + continue; + } + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); + + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_br_r_writing_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_writing, request); + return; + } + + base_bdev_io_not_submitted = ((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs + 1 - raid_io->base_bdev_io_submitted; + raid5_w_br_r_writing_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } +} + static void raid5_submit_write_request(struct raid5_stripe_request *request) { From b704607e362422d71d787a42fcc1c8a4b5b53fe4 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:03:10 +0000 Subject: [PATCH 64/71] feat: Add cb and part completion fns for write with broken req strip reading part. --- module/bdev/raid/raid5.c | 234 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 24117ccdc6c..ef37918080c 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2462,6 +2462,240 @@ raid5_write_broken_req_writing(struct raid5_stripe_request *request) } } +static bool +raid5_w_br_r_reading_complete_part(struct raid5_stripe_request *request, uint64_t completed, + enum spdk_bdev_io_status status) +{ + struct raid_bdev_io *raid_io = request->raid_io; + + assert(raid_io->base_bdev_io_remaining >= completed); + raid_io->base_bdev_io_remaining -= completed; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid_io->base_bdev_io_status = status; + } + + if (raid_io->base_bdev_io_remaining == 0) { + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + uint64_t block_size_b8 = ((uint64_t)128 * raid_bdev->strip_size_kb) / + raid_bdev->strip_size; + uint8_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint8_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint8_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint8_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint8_t after_ps_idx = raid5_next_idx(ps_idx, raid_bdev); + uint64_t ofs_b8; + uint64_t num_b8; + int ret = 0; + + if (raid_io->base_bdev_io_status != SPDK_BDEV_IO_STATUS_SUCCESS) { + raid5_w_br_r_reading_free_strip_buffs(request); + raid5_stripe_req_complete(request); + return true; + } + + if (sts_idx == es_idx) { + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, sts_idx) * block_size_b8; + for (uint8_t i = after_sts_idx; i != ps_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + for (uint8_t i = after_ps_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + num_b8 = raid_bdev->strip_size * block_size_b8; + for (uint8_t i = after_es_idx; i != ps_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + for (uint8_t i = after_ps_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + num_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], 0, + request->strip_buffs[sts_idx], request->strip_buffs_cnts[sts_idx], 0, + num_b8); + num_b8 = (raid_bdev->strip_size - raid5_num_blcks(bdev_io, raid_bdev, es_idx)) * + block_size_b8; + ofs_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[es_idx], request->strip_buffs_cnts[es_idx], ofs_b8, + num_b8); + } else if (request->broken_strip_idx == sts_idx) { + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, sts_idx) * block_size_b8; + for (uint8_t i = ps_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + for (uint8_t i = after_es_idx; i != ps_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + ofs_b8 = 0; + num_b8 = (raid_bdev->strip_size - + raid5_num_blcks(bdev_io, raid_bdev, sts_idx)) * block_size_b8; + for (uint8_t i = after_sts_idx; i != es_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + if (raid5_num_blcks(bdev_io, raid_bdev, es_idx) < raid_bdev->strip_size - + raid5_num_blcks(bdev_io, raid_bdev, sts_idx)) { + ofs_b8 = 0; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[es_idx], request->strip_buffs_cnts[es_idx], ofs_b8, + num_b8); + + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, sts_idx) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[es_idx], request->strip_buffs_cnts[es_idx], ofs_b8, + num_b8); + } else { + ofs_b8 = 0; + num_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[es_idx], request->strip_buffs_cnts[es_idx], ofs_b8, + num_b8); + + ofs_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + num_b8 = (raid_bdev->strip_size - raid5_num_blcks(bdev_io, raid_bdev, es_idx)) * + block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[es_idx], request->strip_buffs_cnts[es_idx], ofs_b8, + num_b8); + } + } else { + ofs_b8 = 0; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + for (uint8_t i = ps_idx; i != sts_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + for (uint8_t i = after_es_idx; i != ps_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + ofs_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + num_b8 = (raid_bdev->strip_size - + raid5_num_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + for (uint8_t i = after_sts_idx; i != es_idx; i = raid5_next_idx(i, raid_bdev)) { + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], ofs_b8, + num_b8); + } + + if (raid5_num_blcks(bdev_io, raid_bdev, sts_idx) < raid_bdev->strip_size - + raid5_num_blcks(bdev_io, raid_bdev, es_idx)) { + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, sts_idx) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[sts_idx], request->strip_buffs_cnts[sts_idx], ofs_b8, + num_b8); + + ofs_b8 = 0; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[sts_idx], request->strip_buffs_cnts[sts_idx], ofs_b8, + num_b8); + } else { + ofs_b8 = raid5_num_blcks(bdev_io, raid_bdev, es_idx) * block_size_b8; + num_b8 = (raid_bdev->strip_size - raid5_num_blcks(bdev_io, raid_bdev, es_idx)) * + block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[sts_idx], request->strip_buffs_cnts[sts_idx], ofs_b8, + num_b8); + + ofs_b8 = 0; + num_b8 = (raid_bdev->strip_size - raid5_num_blcks(bdev_io, raid_bdev, sts_idx)) + * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[sts_idx], request->strip_buffs_cnts[sts_idx], ofs_b8, + num_b8); + } + } + + ret = raid5_write_broken_req_reset_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return true; + } + + for (uint8_t i = sts_idx; i != after_es_idx; i = raid5_next_idx(i, raid_bdev)) { + ofs_b8 = (raid5_ofs_blcks(bdev_io, raid_bdev, i) - + raid5_ofs_blcks(bdev_io, raid_bdev, es_idx)) * block_size_b8; + num_b8 = raid5_num_blcks(bdev_io, raid_bdev, i) * block_size_b8; + raid5_xor_iovs_with_iovs(request->strip_buffs[ps_idx], + request->strip_buffs_cnts[ps_idx], ofs_b8, + request->strip_buffs[i], request->strip_buffs_cnts[i], 0, + num_b8); + } + + raid_io->base_bdev_io_submitted = 0; + raid_io->base_bdev_io_remaining = (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs) + 1; + + raid5_write_broken_req_writing(request); + + return true; + } else { + return false; + } +} + +static void +raid5_w_br_r_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + struct raid5_stripe_request *request = cb_arg; + + spdk_bdev_free_io(bdev_io); + + raid5_w_br_r_reading_complete_part(request, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + static void raid5_submit_write_request(struct raid5_stripe_request *request) { From 4ac4ea7c6546240d2dabb3ceaf2937af901abf0a Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:04:53 +0000 Subject: [PATCH 65/71] feat: Add fn for write req/broken req strip case/reading part. --- module/bdev/raid/raid5.c | 170 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index ef37918080c..8d8fad755ec 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2696,6 +2696,176 @@ raid5_w_br_r_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg SPDK_BDEV_IO_STATUS_FAILED); } +static void raid5_write_broken_req_reading(struct raid5_stripe_request *request); + +static void +_raid5_write_broken_req_reading(void *cb_arg) +{ + struct raid5_stripe_request *request = cb_arg; + raid5_write_broken_req_reading(request); +} + +static void +raid5_write_broken_req_reading(struct raid5_stripe_request *request) +{ + struct raid_bdev_io *raid_io = request->raid_io; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); + uint64_t after_sts_idx = raid5_next_idx(sts_idx, raid_bdev); + uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); + uint64_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); + uint64_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); + uint64_t num_strips_to_r; + uint64_t base_bdev_io_not_submitted; + uint64_t ofs_blcks; + uint64_t num_blcks; + uint8_t start_idx; + int ret = 0; + + if (sts_idx == es_idx) { + num_strips_to_r = raid_bdev->num_base_bdevs - 2; + start_idx = (after_sts_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + if (start_idx == ps_idx) { + start_idx = raid5_next_idx(start_idx, raid_bdev); + } + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx); + num_blcks = raid5_num_blcks(bdev_io, raid_bdev, sts_idx); + + for (uint8_t idx = start_idx; idx != sts_idx; idx = raid5_next_idx(idx, raid_bdev)) { + if (idx == ps_idx) { + continue; + } + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_br_r_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_br_r_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + num_strips_to_r = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs); + start_idx = (es_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + if (start_idx == ps_idx) { + start_idx = raid5_next_idx(start_idx, raid_bdev); + } + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + num_blcks = raid_bdev->strip_size; + + for (uint8_t idx = start_idx; idx != after_sts_idx; idx = raid5_next_idx(idx, raid_bdev)) { + if (idx == ps_idx) { + continue; + } + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_br_r_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_br_r_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } + } else if (request->broken_strip_idx == sts_idx) { + num_strips_to_r = raid_bdev->num_base_bdevs - 1; + start_idx = (after_sts_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + num_blcks = raid_bdev->strip_size; + + for (uint8_t idx = start_idx; idx != sts_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_br_r_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_br_r_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } + } else { + num_strips_to_r = raid_bdev->num_base_bdevs - 1; + start_idx = (after_es_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); + num_blcks = raid_bdev->strip_size; + + for (uint8_t idx = start_idx; idx != es_idx; idx = raid5_next_idx(idx, raid_bdev)) { + base_info = &raid_bdev->base_bdev_info[idx]; + base_ch = raid_ch->base_channel[idx]; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + request->strip_buffs[idx], request->strip_buffs_cnts[idx], + ofs_blcks, num_blcks, + raid5_w_br_r_reading_cb, + request); + + if (spdk_unlikely(ret != 0)) { + if (spdk_unlikely(ret == -ENOMEM)) { + raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), + base_ch, _raid5_write_broken_req_reading, request); + return; + } + + base_bdev_io_not_submitted = num_strips_to_r - raid_io->base_bdev_io_submitted; + raid5_w_br_r_reading_complete_part(request, base_bdev_io_not_submitted, + SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ++raid_io->base_bdev_io_submitted; + } + } +} + static void raid5_submit_write_request(struct raid5_stripe_request *request) { From 5cde8880ec6925559b2b36e2ba5221df9791caf2 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:08:15 +0000 Subject: [PATCH 66/71] feat: Add submitting write request with broken parity strip. --- module/bdev/raid/raid5.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 8d8fad755ec..f6186934e17 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2949,6 +2949,26 @@ raid5_submit_write_request(struct raid5_stripe_request *request) raid5_write_r_modify_w_reading(request); } else { // broken req strip + + ret = raid5_write_broken_req_set_strip_buffs(request); + if (spdk_unlikely(ret != 0)) { + SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + raid5_stripe_req_complete(request); + return; + } + + raid_io->base_bdev_io_submitted = 0; + if (sts_idx == es_idx) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 2; + } else if (request->broken_strip_idx != sts_idx && request->broken_strip_idx != es_idx) { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - + sts_idx) % raid_bdev->num_base_bdevs); + } else { + raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 1; + } + + raid5_write_broken_req_reading(request); } } From 7715abd851ed9ca14bd481bb88d0cdbbaf025357 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:14:41 +0000 Subject: [PATCH 67/71] refactor: Remove unused structs and functions. --- module/bdev/raid/raid5.c | 564 --------------------------------------- 1 file changed, 564 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index f6186934e17..58e9a10ee3e 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -37,18 +37,6 @@ struct raid5_stripe_request { int broken_strip_idx; }; -struct raid5_io_buffer { - struct raid_bdev_io *raid_io; - - struct iovec *buffer; -}; - -struct raid5_write_request_buffer { - struct raid5_io_buffer *wr_xor_buff; - - struct iovec *buffer; -}; - static inline uint8_t raid5_parity_strip_index(struct raid_bdev *raid_bdev, uint64_t stripe_index) { @@ -82,106 +70,6 @@ raid5_free_buffer(struct iovec *buffer) free(buffer); } -static inline struct raid5_io_buffer * -raid5_get_io_buffer(struct raid_bdev_io *raid_io, size_t data_len) -{ - struct raid5_io_buffer *io_buffer; - - io_buffer = calloc(1, sizeof(struct raid5_io_buffer)); - if (io_buffer == NULL) { - return NULL; - } - - io_buffer->buffer = raid5_get_buffer(data_len); - if (io_buffer->buffer == NULL) { - free(io_buffer); - return NULL; - } - - io_buffer->raid_io = raid_io; - return io_buffer; -} - -static inline void -raid5_free_io_buffer(struct raid5_io_buffer *io_buffer) -{ - raid5_free_buffer(io_buffer->buffer); - free(io_buffer); -} - -static inline struct raid5_write_request_buffer * -raid5_get_write_request_buffer(struct raid5_io_buffer *wr_xor_buff, size_t data_len) -{ - struct raid5_write_request_buffer *wr_buffer; - - wr_buffer = calloc(1, sizeof(struct raid5_write_request_buffer)); - if (wr_buffer == NULL) { - return NULL; - } - - wr_buffer->buffer = raid5_get_buffer(data_len); - if (wr_buffer->buffer == NULL) { - free(wr_buffer); - return NULL; - } - - wr_buffer->wr_xor_buff = wr_xor_buff; - return wr_buffer; -} - -static inline void -raid5_free_write_request_buffer(struct raid5_write_request_buffer *wr_buffer) -{ - raid5_free_buffer(wr_buffer->buffer); - free(wr_buffer); -} - -static inline void -raid5_xor_buffers(struct iovec *xor_res, struct iovec *buffer) -{ - uint64_t *xb8 = xor_res->iov_base; - uint64_t *b8 = buffer->iov_base; - size_t len8 = xor_res->iov_len / 8; - - for (size_t i=0; i < len8; ++i) { - xb8[i] ^= b8[i]; - } -} - -static inline void -raid5_xor_iovs_with_buffer(struct iovec *iovs, int iovcnt, struct iovec *buffer) -{ - uint64_t *xb8; - uint64_t *b8 = buffer->iov_base; - size_t b8i = 0; - size_t len8; - - for (int iovidx = 0; iovidx < iovcnt; ++iovidx) { - xb8 = iovs[iovidx].iov_base; - len8 = iovs[iovidx].iov_len / 8; - for (size_t i = 0; i < len8; ++i, ++b8i) { - xb8[i] ^= b8[b8i]; - } - } -} - -static inline void -raid5_xor_buffer_with_iovs(struct iovec *buffer, struct iovec *iovs, int iovcnt) -{ - uint64_t *xb8 = buffer->iov_base; - uint64_t *b8; - size_t xb8i = 0; - size_t len8; - - for (int iovidx = 0; iovidx < iovcnt; ++iovidx) { - b8 = iovs[iovidx].iov_base; - len8 = iovs[iovidx].iov_len / 8; - for (size_t i = 0; i < len8; ++i, ++xb8i) { - xb8[xb8i] ^= b8[i]; - } - } -} - static inline void raid5_fill_iovs_with_zeroes(struct iovec *iovs, int iovcnt) { @@ -207,135 +95,6 @@ raid5_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); } -static void -raid5_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct raid_bdev_io *raid_io = cb_arg; - - spdk_bdev_free_io(bdev_io); - - raid_bdev_io_complete(raid_io, success ? - SPDK_BDEV_IO_STATUS_SUCCESS : - SPDK_BDEV_IO_STATUS_FAILED); -} - -static void -raid5_read_request_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct raid5_io_buffer *io_buffer = cb_arg; - struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(io_buffer->raid_io); - - spdk_bdev_free_io(bdev_io); - - assert(io_buffer->raid_io->base_bdev_io_remaining > 0); - io_buffer->raid_io->base_bdev_io_remaining--; - - if (!success) { - io_buffer->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - } else { - raid5_xor_iovs_with_buffer(rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt, - io_buffer->buffer); - } - - if (io_buffer->raid_io->base_bdev_io_remaining == 0) { - raid_bdev_io_complete(io_buffer->raid_io, - io_buffer->raid_io->base_bdev_io_status); - } - - raid5_free_io_buffer(io_buffer); -} - -static void raid5_submit_write_request_writing(struct raid5_io_buffer *io_buffer); - -static void -raid5_write_request_reading_complete_part (struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct raid5_write_request_buffer *wr_buffer = cb_arg; - struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(wr_buffer->wr_xor_buff->raid_io); - - spdk_bdev_free_io(bdev_io); - - assert(wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining > 0); - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining--; - - if (!success) { - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - } else { - raid5_xor_buffers(wr_buffer->wr_xor_buff->buffer, wr_buffer->buffer); - } - - if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining == 0) { - if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { - raid5_xor_buffer_with_iovs(wr_buffer->wr_xor_buff->buffer, - rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt); - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_submitted = 1; - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining = 1; - raid5_submit_write_request_writing(wr_buffer->wr_xor_buff); - } else { - raid_bdev_io_complete(wr_buffer->wr_xor_buff->raid_io, - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status); - raid5_free_io_buffer(wr_buffer->wr_xor_buff); - } - } - - raid5_free_write_request_buffer(wr_buffer); -} - -static void -raid5_write_request_reading_with_writing_req_strip_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct raid5_write_request_buffer *wr_buffer = cb_arg; - struct spdk_bdev_io *rbdev_io = spdk_bdev_io_from_ctx(wr_buffer->wr_xor_buff->raid_io); - - spdk_bdev_free_io(bdev_io); - - assert(wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining > 0); - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining--; - - if (!success) { - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - } else { - raid5_xor_buffers(wr_buffer->wr_xor_buff->buffer, wr_buffer->buffer); - } - - if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining == 0) { - if (wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status == SPDK_BDEV_IO_STATUS_SUCCESS) { - raid5_xor_buffer_with_iovs(wr_buffer->wr_xor_buff->buffer, - rbdev_io->u.bdev.iovs, rbdev_io->u.bdev.iovcnt); - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_submitted = 0; - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_remaining = 2; - raid5_submit_write_request_writing(wr_buffer->wr_xor_buff); - } else { - raid_bdev_io_complete(wr_buffer->wr_xor_buff->raid_io, - wr_buffer->wr_xor_buff->raid_io->base_bdev_io_status); - raid5_free_io_buffer(wr_buffer->wr_xor_buff); - } - } - - raid5_free_write_request_buffer(wr_buffer); -} - -static void -raid5_write_request_writing_complete_part(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct raid5_io_buffer *io_buffer = cb_arg; - - spdk_bdev_free_io(bdev_io); - - assert(io_buffer->raid_io->base_bdev_io_remaining > 0); - io_buffer->raid_io->base_bdev_io_remaining--; - - if (!success) { - io_buffer->raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - } - - if (io_buffer->raid_io->base_bdev_io_remaining == 0) { - raid_bdev_io_complete(io_buffer->raid_io, - io_buffer->raid_io->base_bdev_io_status); - raid5_free_io_buffer(io_buffer); - } -} - static bool raid5_check_io_boundaries(struct raid_bdev_io *raid_io) { @@ -2877,7 +2636,6 @@ raid5_submit_write_request(struct raid5_stripe_request *request) struct spdk_io_channel *base_ch; uint64_t sts_idx = raid5_start_strip_idx(bdev_io, raid_bdev); uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); - uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); uint8_t ps_idx = raid5_parity_strip_index(raid_bdev, raid5_stripe_idx(bdev_io, raid_bdev)); int ret = 0; @@ -2972,328 +2730,6 @@ raid5_submit_write_request(struct raid5_stripe_request *request) } } -static void raid5_submit_rw_request(struct raid_bdev_io *raid_io); - -static void -_raid5_submit_rw_request(void *_raid_io) -{ - struct raid_bdev_io *raid_io = _raid_io; - - raid5_submit_rw_request(raid_io); -} - -static void raid5_submit_write_request_reading(struct raid5_io_buffer *wr_xor_buff); - -static void -_raid5_submit_write_request_reading(void *_wr_xor_buff) -{ - struct raid5_io_buffer *wr_xor_buff = _wr_xor_buff; - - raid5_submit_write_request_reading(wr_xor_buff); -} - -static void -raid5_submit_write_request_reading(struct raid5_io_buffer *wr_xor_buff) -{ - struct raid_bdev_io *raid_io = wr_xor_buff->raid_io; - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - struct spdk_bdev_ext_io_opts io_opts = {}; - struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; - struct raid_bdev *raid_bdev = raid_io->raid_bdev; - uint64_t block_size_b = (raid_bdev->strip_size_kb / raid_bdev->strip_size) * (uint64_t)1024; - uint8_t broken_bdev_idx = raid_bdev->num_base_bdevs; - uint64_t stripe_index; - uint64_t parity_strip_idx; - uint64_t req_bdev_idx; - uint32_t offset_in_strip; - uint64_t offset_blocks; - uint64_t num_blocks; - int ret = 0; - uint64_t start_strip_idx; - struct raid_base_bdev_info *base_info; - struct spdk_io_channel *base_ch; - struct raid5_write_request_buffer *wr_buffer; - - start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - - io_opts.size = sizeof(io_opts); - io_opts.memory_domain = bdev_io->u.bdev.memory_domain; - io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; - io_opts.metadata = bdev_io->u.bdev.md_buf; - - stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); - parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); - offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - - req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); - if (req_bdev_idx >= parity_strip_idx) { - ++req_bdev_idx; - } - offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; - num_blocks = bdev_io->u.bdev.num_blocks; - - // calculating of broken strip idx - for (uint8_t idx = 0; idx < raid_bdev->num_base_bdevs; ++idx) { - if (raid_ch->base_channel[idx] == NULL) { - if (broken_bdev_idx == raid_bdev->num_base_bdevs) { - broken_bdev_idx = idx; - } else { - SPDK_ERRLOG("2 broken strips\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - if (raid_io->base_bdev_io_submitted == 0) { - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - return; - } - } - } - - if (broken_bdev_idx != req_bdev_idx && broken_bdev_idx != raid_bdev->num_base_bdevs) { - // case: broken strip isn't request strip or parity strip - - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = 2; - } - - switch (raid_io->base_bdev_io_submitted) { - case 0: - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - - wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); - if (wr_buffer == NULL) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - return; - } - - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - wr_buffer->buffer, 1, - offset_blocks, num_blocks, raid5_write_request_reading_with_writing_req_strip_complete_part, - wr_buffer, &io_opts); - - if (ret != 0) { - raid5_free_write_request_buffer(wr_buffer); - if (ret == -ENOMEM) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid5_free_io_buffer(wr_xor_buff); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - } - return; - } - raid_io->base_bdev_io_submitted++; - case 1: - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - - wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); - if (wr_buffer == NULL) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - return; - } - - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - wr_buffer->buffer, 1, - offset_blocks, num_blocks, raid5_write_request_reading_with_writing_req_strip_complete_part, - wr_buffer, &io_opts); - - if (ret != 0) { - raid5_free_write_request_buffer(wr_buffer); - if (ret == -ENOMEM) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - 2; - if (raid_io->base_bdev_io_remaining == 0) { - raid5_free_io_buffer(wr_xor_buff); - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - } - return; - } - raid_io->base_bdev_io_submitted++; - } - } else { - // cases with reading stripe - - uint8_t start_idx; - spdk_bdev_io_completion_cb cb; - - if (broken_bdev_idx == req_bdev_idx) { - // case: broken request strip - cb = raid5_write_request_reading_complete_part; - } else { - // case: without broken strip - cb = raid5_write_request_reading_with_writing_req_strip_complete_part; - } - - if (raid_io->base_bdev_io_submitted == 0) { - raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs - 2; - } - - start_idx = raid_io->base_bdev_io_submitted; - if (req_bdev_idx <= start_idx || parity_strip_idx <= start_idx) { - start_idx++; - if (req_bdev_idx <= start_idx && parity_strip_idx <= start_idx) { - start_idx++; - } - } - - for (uint8_t idx = start_idx; idx < raid_bdev->num_base_bdevs; ++idx) { - if (idx == req_bdev_idx || idx == parity_strip_idx) { - continue; - } - - base_info = &raid_bdev->base_bdev_info[idx]; - base_ch = raid_ch->base_channel[idx]; - - wr_buffer = raid5_get_write_request_buffer(wr_xor_buff, num_blocks * block_size_b); - if (wr_buffer == NULL) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - return; - } - - ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, - wr_buffer->buffer, 1, - offset_blocks, num_blocks, cb, - wr_buffer, &io_opts); - - if (ret != 0) { - raid5_free_write_request_buffer(wr_buffer); - if (ret == -ENOMEM) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_reading, wr_xor_buff); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - - (raid_bdev->num_base_bdevs - 2); - if (raid_io->base_bdev_io_remaining == 0) { - raid5_free_io_buffer(wr_xor_buff); - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - } - return; - } - raid_io->base_bdev_io_submitted++; - } - } -} - -static void -_raid5_submit_write_request_writing(void *_io_buffer) -{ - struct raid5_io_buffer *io_buffer = _io_buffer; - - raid5_submit_write_request_writing(io_buffer); -} - -static void -raid5_submit_write_request_writing(struct raid5_io_buffer *io_buffer) -{ - struct raid_bdev_io *raid_io = io_buffer->raid_io; - struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); - struct spdk_bdev_ext_io_opts io_opts = {}; - struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; - struct raid_bdev *raid_bdev = raid_io->raid_bdev; - uint64_t stripe_index; - uint64_t parity_strip_idx; - uint64_t req_bdev_idx; - uint32_t offset_in_strip; - uint64_t offset_blocks; - uint64_t num_blocks; - int ret = 0; - uint64_t start_strip_idx; - struct raid_base_bdev_info *base_info; - struct spdk_io_channel *base_ch; - - start_strip_idx = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - - io_opts.size = sizeof(io_opts); - io_opts.memory_domain = bdev_io->u.bdev.memory_domain; - io_opts.memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; - io_opts.metadata = bdev_io->u.bdev.md_buf; - - stripe_index = start_strip_idx / (raid_bdev->num_base_bdevs - 1); - parity_strip_idx = raid5_parity_strip_index(raid_bdev, stripe_index); - offset_in_strip = bdev_io->u.bdev.offset_blocks % (raid_bdev->strip_size); - - req_bdev_idx = start_strip_idx % (raid_bdev->num_base_bdevs - 1); - if (req_bdev_idx >= parity_strip_idx) { - ++req_bdev_idx; - } - offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + offset_in_strip; - num_blocks = bdev_io->u.bdev.num_blocks; - - switch (raid_io->base_bdev_io_submitted) { - case 0: - // writing request strip - - base_info = &raid_bdev->base_bdev_info[req_bdev_idx]; - base_ch = raid_ch->base_channel[req_bdev_idx]; - - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - offset_blocks, num_blocks, raid5_write_request_writing_complete_part, - io_buffer, &io_opts); - - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_writing, io_buffer); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid5_free_io_buffer(io_buffer); - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); - } - return; - } - - raid_io->base_bdev_io_submitted++; - case 1: - // writing parity strip - - base_info = &raid_bdev->base_bdev_info[parity_strip_idx]; - base_ch = raid_ch->base_channel[parity_strip_idx]; - - ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, - io_buffer->buffer, 1, - offset_blocks, num_blocks, raid5_write_request_writing_complete_part, - io_buffer, &io_opts); - - if (ret != 0) { - if (ret == -ENOMEM) { - raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), - base_ch, _raid5_submit_write_request_writing, io_buffer); - } else { - SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); - assert(false); - raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; - raid_io->base_bdev_io_remaining = raid_io->base_bdev_io_remaining + raid_io->base_bdev_io_submitted - 2; - if (raid_io->base_bdev_io_remaining == 0) { - raid5_free_io_buffer(io_buffer); - raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); - } - } - return; - } - - raid_io->base_bdev_io_submitted++; - } -} - static void raid5_submit_rw_request(struct raid_bdev_io *raid_io) { From 1d542df3c2373675a98fc2a0937e233c934a7529 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 19 Dec 2023 02:15:33 +0000 Subject: [PATCH 68/71] refactor: Remove debug logs. --- module/bdev/raid/raid5.c | 147 ++------------------------------------- 1 file changed, 7 insertions(+), 140 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 58e9a10ee3e..03df082dc5f 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -330,8 +330,6 @@ raid5_allocate_strips_buffs_until(struct raid5_stripe_request *request, struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; uint64_t block_size_b = ((uint64_t)1024 * raid_bdev->strip_size_kb) / raid_bdev->strip_size; - SPDK_ERRLOG("raid5_allocate_strips_buffs_until\n"); - for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { request->strip_buffs_cnts[idx] = 1; request->strip_buffs[idx] = raid5_get_buffer(num_blcks * block_size_b); @@ -353,8 +351,6 @@ raid5_free_strips_buffs_until(struct raid5_stripe_request *request, { struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; - SPDK_ERRLOG("raid5_free_strips_buffs_until\n"); - for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { raid5_free_buffer(request->strip_buffs[idx]); request->strip_buffs_cnts[idx] = 0; @@ -375,8 +371,6 @@ raid5_set_req_strips_iovs_until(struct raid5_stripe_request *request, uint64_t *iov_base_b8; int end_iov_idx; - SPDK_ERRLOG("raid5_set_req_strips_iovs_until\n"); - for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { num_blcks = raid5_num_blcks(bdev_io, raid_bdev, idx); end_iov_idx = *iov_idx; @@ -438,8 +432,6 @@ raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, { struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; - SPDK_ERRLOG("raid5_free_req_strips_iovs_until\n"); - for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { free(request->strip_buffs[idx]); request->strip_buffs[idx] = NULL; @@ -457,7 +449,6 @@ raid5_set_all_req_strips_iovs(struct raid5_stripe_request *request) uint64_t remaining_len = bdev_io->u.bdev.iovs[0].iov_len; int iov_idx = 0; - SPDK_ERRLOG("raid5_set_all_req_strips_iovs\n"); return raid5_set_req_strips_iovs_until(request, ststrip_idx, after_estrip_idx, &iov_idx, &remaining_len); } @@ -492,8 +483,6 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc int sts_idx_ofs = 0; int es_idx_extra = 0; - SPDK_ERRLOG("raid5_set_all_strip_buffs\n"); - // not req strip and parity strip ret = raid5_allocate_strips_buffs_until(request, after_es_idx, sts_idx, num_blcks); if (ret != 0) { @@ -527,12 +516,6 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc request->strip_buffs[sts_idx][sts_idx_ofs].iov_base = &iov_base_b8[(bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8]; - SPDK_ERRLOG("iov_base_b8: %llu\n", iov_base_b8); - SPDK_ERRLOG("idx: %lu\n", (bdev_io->u.bdev.iovs[iov_idx].iov_len - remaining_len) / 8); - SPDK_ERRLOG("iov_base: %llu\n", request->strip_buffs[sts_idx][sts_idx_ofs].iov_base); - SPDK_ERRLOG("remaining len: %lu\n", remaining_len); - SPDK_ERRLOG("iov_idx: %d", iov_idx); - if (remaining_len >= blocks * block_size_b) { request->strip_buffs[sts_idx][sts_idx_ofs].iov_len = blocks * block_size_b; len -= blocks * block_size_b; @@ -694,8 +677,6 @@ raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_bl uint64_t es_idx = raid5_end_strip_idx(bdev_io, raid_bdev); uint8_t after_es_idx = raid5_next_idx(es_idx, raid_bdev); - SPDK_ERRLOG("raid5_free_all_strip_buffs\n"); - raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx)) { @@ -724,16 +705,12 @@ raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_bl static int raid5_read_req_strips_set_strip_buffs(struct raid5_stripe_request *request) { - SPDK_ERRLOG("raid5_read_req_strips_set_strip_buffs\n"); - return raid5_set_all_req_strips_iovs(request); } static void raid5_read_req_strips_free_strip_buffs(struct raid5_stripe_request *request) { - SPDK_ERRLOG("raid5_read_req_strips_free_strip_buffs\n"); - raid5_free_all_req_strips_iovs(request); } @@ -745,8 +722,6 @@ raid5_read_exc_req_strip_set_strip_buffs(struct raid5_stripe_request *request) uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); - SPDK_ERRLOG("raid5_read_exc_req_strip_set_strip_buffs\n"); - return raid5_set_all_strip_buffs(request, ofs_blcks, num_blcks); } @@ -758,8 +733,6 @@ raid5_read_exc_req_strip_free_strip_buffs(struct raid5_stripe_request *request) uint64_t ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, request->broken_strip_idx); uint64_t num_blcks = raid5_num_blcks(bdev_io, raid_bdev, request->broken_strip_idx); - SPDK_ERRLOG("raid5_read_exc_req_strip_free_strip_buffs\n"); - raid5_free_all_strip_buffs(request, ofs_blcks, num_blcks); } @@ -773,8 +746,6 @@ raid5_write_default_set_strip_buffs(struct raid5_stripe_request *request) uint64_t es_ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); - SPDK_ERRLOG("raid5_write_default_set_strip_buffs\n"); - if (sts_idx != es_idx) { return raid5_set_all_strip_buffs(request, es_ofs_blcks, raid_bdev->strip_size); } else { @@ -792,8 +763,6 @@ raid5_write_default_free_strip_buffs(struct raid5_stripe_request *request) uint64_t es_ofs_blcks = raid5_ofs_blcks(bdev_io, raid_bdev, es_idx); uint64_t es_num_blcks = raid5_num_blcks(bdev_io, raid_bdev, es_idx); - SPDK_ERRLOG("raid5_write_default_free_strip_buffs\n"); - if (sts_idx != es_idx) { return raid5_free_all_strip_buffs(request, es_ofs_blcks, raid_bdev->strip_size); } else { @@ -804,16 +773,12 @@ raid5_write_default_free_strip_buffs(struct raid5_stripe_request *request) static int raid5_write_broken_parity_set_strip_buffs(struct raid5_stripe_request *request) { - SPDK_ERRLOG("raid5_write_broken_parity_set_strip_buffs\n"); - return raid5_set_all_req_strips_iovs(request); } static void raid5_write_broken_parity_free_strip_buffs(struct raid5_stripe_request *request) { - SPDK_ERRLOG("raid5_write_broken_parity_free_strip_buffs\n"); - raid5_free_all_req_strips_iovs(request); } @@ -894,7 +859,6 @@ raid5_write_r_modify_w_reset_strip_buffs(struct raid5_stripe_request *request) raid5_free_strips_buffs_until(request, sts_idx, after_es_idx); ret = raid5_set_all_req_strips_iovs(request); if (ret != 0) { - SPDK_ERRLOG("chop chop\n"); raid5_free_strips_buffs_until(request, ps_idx, after_ps_idx); } return ret; @@ -1016,8 +980,6 @@ raid5_read_complete_part(struct raid5_stripe_request *request, uint64_t complete { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_read_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1089,8 +1051,6 @@ raid5_read_complete_part(struct raid5_stripe_request *request, uint64_t complete static void raid5_read_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_read_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_read_complete_part(request, 1, success ? @@ -1123,8 +1083,6 @@ raid5_read_req_strips(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_read_req_strips\n"); - start_idx = (ststrip_idx + raid_io->base_bdev_io_submitted) > raid_bdev->num_base_bdevs ? ststrip_idx + raid_io->base_bdev_io_submitted - raid_bdev->num_base_bdevs : ststrip_idx + raid_io->base_bdev_io_submitted; @@ -1186,8 +1144,6 @@ raid5_read_except_one_req_strip(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_read_except_one_req_strip\n"); - start_idx = (after_brstrip_idx + raid_io->base_bdev_io_submitted) > raid_bdev->num_base_bdevs ? after_brstrip_idx + raid_io->base_bdev_io_submitted - raid_bdev->num_base_bdevs : after_brstrip_idx + raid_io->base_bdev_io_submitted; @@ -1244,8 +1200,6 @@ raid5_submit_read_request(struct raid5_stripe_request *request) uint8_t after_estrip_idx = raid5_next_idx(estrip_idx, raid_bdev); int ret = 0; - SPDK_ERRLOG("raid5_submit_read_request"); - for (uint8_t idx = ststrip_idx; idx != after_estrip_idx; idx = raid5_next_idx(idx, raid_bdev)) { base_ch = raid_ch->base_channel[idx]; @@ -1308,8 +1262,6 @@ raid5_w_default_writing_complete_part(struct raid5_stripe_request *request, uint { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_w_default_writing_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1331,8 +1283,6 @@ static void raid5_w_default_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_w_default_writing_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_w_default_writing_complete_part(request, 1, success ? @@ -1386,14 +1336,6 @@ raid5_write_default_writing(struct raid5_stripe_request *request) raid5_w_default_writing_cb, request); - SPDK_ERRLOG("par idx: %d\npar ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", - ps_idx, - ret, - request->strip_buffs_cnts[ps_idx], - request->strip_buffs[ps_idx][0].iov_base, - ofs_blcks, - num_blcks); - if (spdk_unlikely(ret != 0)) { if (spdk_unlikely(ret == -ENOMEM)) { raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), @@ -1420,14 +1362,6 @@ raid5_write_default_writing(struct raid5_stripe_request *request) raid5_w_default_writing_cb, request); - SPDK_ERRLOG("req idx: %d\nreq ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", - idx, - ret, - request->strip_buffs_cnts[idx], - request->strip_buffs[idx][0].iov_base, - ofs_blcks, - num_blcks); - if (spdk_unlikely(ret != 0)) { if (spdk_unlikely(ret == -ENOMEM)) { raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), @@ -1451,8 +1385,6 @@ raid5_w_default_reading_complete_part(struct raid5_stripe_request *request, uint { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_w_default_reading_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1501,11 +1433,6 @@ raid5_w_default_reading_complete_part(struct raid5_stripe_request *request, uint raid_io->base_bdev_io_remaining = (((es_idx + raid_bdev->num_base_bdevs) - sts_idx) % raid_bdev->num_base_bdevs) + 2; - for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; ++i) { - uint64_t *b8 = request->strip_buffs[i][0].iov_base; - SPDK_ERRLOG("%d strip, first 8 byte: %llu\n", i, b8[0]); - } - raid5_write_default_writing(request); return true; @@ -1518,8 +1445,6 @@ static void raid5_w_default_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_w_default_reading_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_w_default_reading_complete_part(request, 1, success ? @@ -1559,8 +1484,6 @@ raid5_write_default_reading(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_write_default_reading\n"); - if (sts_idx != es_idx) { num_strips_to_r = raid_bdev->num_base_bdevs - (((es_idx + raid_bdev->num_base_bdevs) - sts_idx) % raid_bdev->num_base_bdevs); @@ -1671,8 +1594,6 @@ raid5_w_broken_ps_complete_part(struct raid5_stripe_request *request, uint64_t c { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_w_broken_ps_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1694,8 +1615,6 @@ static void raid5_w_broken_ps_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_w_broken_ps_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_w_broken_ps_complete_part(request, 1, success ? @@ -1730,8 +1649,6 @@ raid5_write_broken_parity_strip(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_write_broken_parity_strip\n"); - start_idx = (sts_idx + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; for (uint8_t idx = start_idx; idx != after_es_idx; idx = raid5_next_idx(idx, raid_bdev)) { @@ -1747,14 +1664,6 @@ raid5_write_broken_parity_strip(struct raid5_stripe_request *request) raid5_w_broken_ps_cb, request); - SPDK_ERRLOG("req idx: %d\nreq ret: %d\niovs cnt: %d\niovs[0]: %llu\nofs: %llu\nnum_blocks: %llu\n", - idx, - ret, - request->strip_buffs_cnts[idx], - request->strip_buffs[idx][0].iov_base, - ofs_blcks, - num_blcks); - if (spdk_unlikely(ret != 0)) { if (spdk_unlikely(ret == -ENOMEM)) { raid5_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), @@ -1779,8 +1688,6 @@ raid5_w_r_modify_w_writing_complete_part(struct raid5_stripe_request *request, u { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_w_r_modify_w_writing_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1802,8 +1709,6 @@ static void raid5_w_r_modify_w_writing_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_w_r_modify_w_writing_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_w_r_modify_w_writing_complete_part(request, 1, success ? @@ -1839,8 +1744,6 @@ raid5_write_r_modify_w_writing(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_write_r_modify_w_writing\n"); - if (raid_io->base_bdev_io_submitted == 0) { base_info = &raid_bdev->base_bdev_info[ps_idx]; base_ch = raid_ch->base_channel[ps_idx]; @@ -1912,8 +1815,6 @@ raid5_w_r_modify_w_reading_complete_part(struct raid5_stripe_request *request, u { struct raid_bdev_io *raid_io = request->raid_io; - SPDK_ERRLOG("raid5_w_r_modify_w_reading_complete_part\n"); - assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -1984,8 +1885,6 @@ static void raid5_w_r_modify_w_reading_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid5_stripe_request *request = cb_arg; - SPDK_ERRLOG("raid5_w_r_modify_w_reading_cb\n"); - spdk_bdev_free_io(bdev_io); raid5_w_r_modify_w_reading_complete_part(request, 1, success ? @@ -2021,8 +1920,6 @@ raid5_write_r_modify_w_reading(struct raid5_stripe_request *request) uint8_t start_idx; int ret = 0; - SPDK_ERRLOG("raid5_write_r_modify_w_reading\n"); - if (raid_io->base_bdev_io_submitted == 0) { base_info = &raid_bdev->base_bdev_info[ps_idx]; base_ch = raid_ch->base_channel[ps_idx]; @@ -2654,7 +2551,7 @@ raid5_submit_write_request(struct raid5_stripe_request *request) } } } - + if (request->broken_strip_idx == raid_bdev->num_base_bdevs && r5_info->write_type == DEFAULT) { // default @@ -2707,7 +2604,7 @@ raid5_submit_write_request(struct raid5_stripe_request *request) raid5_write_r_modify_w_reading(request); } else { // broken req strip - + ret = raid5_write_broken_req_set_strip_buffs(request); if (spdk_unlikely(ret != 0)) { SPDK_ERRLOG("RAID5 write request: allocation of buffers is failed\n"); @@ -2739,36 +2636,6 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) struct raid5_info *r5_info = raid_bdev->module_private; struct raid5_stripe_request *request; - // debug logs - - SPDK_ERRLOG("iovcnt: %d\n", bdev_io->u.bdev.iovcnt); - - for (int i = 0; i < bdev_io->u.bdev.iovcnt; ++i) { - SPDK_ERRLOG("iovec #%d, addr:%llu, len: %lu\n", - i, - bdev_io->u.bdev.iovs[i].iov_base, - bdev_io->u.bdev.iovs[i].iov_len); - } - - uint64_t start_strip; - uint64_t end_strip; - uint64_t ststrip_idx = raid5_start_strip_idx(bdev_io, raid_bdev); - uint64_t estrip_idx = raid5_end_strip_idx(bdev_io, raid_bdev); - uint64_t after_estrip_idx = raid5_next_idx(estrip_idx, raid_bdev); - - start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> - raid_bdev->strip_size_shift; - - SPDK_ERRLOG("start_strip: %lu\nend_strip: %lu\nststrip_idx: %lu\nestrip_idx: %lu\nafter_estrip_idx: %lu\n", - start_strip, - end_strip, - ststrip_idx, - estrip_idx, - after_estrip_idx); - - // end debug logs - if (!raid5_check_io_boundaries(raid_io)) { SPDK_ERRLOG("RAID5: I/O spans stripe boundaries!\n"); raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); @@ -2808,7 +2675,7 @@ raid5_submit_rw_request(struct raid_bdev_io *raid_io) static bool raid5_wz_req_complete_part_final(struct raid_bdev_io *raid_io, uint64_t completed, - enum spdk_bdev_io_status status) + enum spdk_bdev_io_status status) { assert(raid_io->base_bdev_io_remaining >= completed); raid_io->base_bdev_io_remaining -= completed; @@ -2983,7 +2850,7 @@ raid5_calculate_blockcnt(struct raid_bdev *raid_bdev) stripe_blockcnt = raid_bdev->strip_size * (raid_bdev->num_base_bdevs - 1); SPDK_DEBUGLOG(bdev_raid5, "min blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", - min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); + min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); return total_stripes * stripe_blockcnt; } @@ -3021,9 +2888,9 @@ raid5_resize(struct raid_bdev *raid_bdev) } SPDK_NOTICELOG("raid5 '%s': min blockcount was changed from %" PRIu64 " to %" PRIu64 "\n", - raid_bdev->bdev.name, - raid_bdev->bdev.blockcnt, - blockcnt); + raid_bdev->bdev.name, + raid_bdev->bdev.blockcnt, + blockcnt); rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt); if (rc != 0) { From 73652bf11352732538803af14d9898fa19dc9fb5 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 20 Feb 2024 02:23:03 +0000 Subject: [PATCH 69/71] feat: Change allocating function to use physical addresses. --- module/bdev/raid/raid5.c | 92 ++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 03df082dc5f..2e80455cb5e 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -48,15 +48,15 @@ raid5_get_buffer(size_t iovlen) { struct iovec *buffer; - buffer = calloc(1, sizeof(*buffer)); + buffer = spdk_dma_malloc(sizeof(*buffer), 0, NULL); if (buffer == NULL) { return NULL; } buffer->iov_len = iovlen; - buffer->iov_base = calloc(buffer->iov_len, sizeof(char)); + buffer->iov_base = spdk_dma_zmalloc(buffer->iov_len * sizeof(char), 0, NULL); if (buffer->iov_base == NULL) { - free(buffer); + spdk_dma_free(buffer); return NULL; } @@ -66,8 +66,8 @@ raid5_get_buffer(size_t iovlen) static inline void raid5_free_buffer(struct iovec *buffer) { - free(buffer->iov_base); - free(buffer); + spdk_dma_free(buffer->iov_base); + spdk_dma_free(buffer); } static inline void @@ -292,7 +292,7 @@ raid5_get_stripe_request(struct raid_bdev_io *raid_io) { struct raid5_stripe_request *request; - request = calloc(1, sizeof(struct raid5_stripe_request)); + request = spdk_dma_malloc(sizeof(struct raid5_stripe_request), 0, NULL); if (request == NULL) { return NULL; } @@ -300,16 +300,16 @@ raid5_get_stripe_request(struct raid_bdev_io *raid_io) request->raid_io = raid_io; request->strip_buffs_cnt = raid_io->raid_bdev->num_base_bdevs; request->broken_strip_idx = raid_io->raid_bdev->num_base_bdevs; - request->strip_buffs = calloc(request->strip_buffs_cnt, sizeof(struct iovec *)); + request->strip_buffs = spdk_dma_malloc(sizeof(struct iovec *) * request->strip_buffs_cnt, 0, NULL); if (request->strip_buffs == NULL) { - free(request); + spdk_dma_free(request); return NULL; } - request->strip_buffs_cnts = calloc(request->strip_buffs_cnt, sizeof(int)); + request->strip_buffs_cnts = spdk_dma_zmalloc(sizeof(int) * request->strip_buffs_cnt, 0, NULL); if (request->strip_buffs_cnts == NULL) { - free(request->strip_buffs); - free(request); + spdk_dma_free(request->strip_buffs); + spdk_dma_free(request); return NULL; } @@ -318,9 +318,9 @@ raid5_get_stripe_request(struct raid_bdev_io *raid_io) static void raid5_free_stripe_request(struct raid5_stripe_request *request) { - free(request->strip_buffs_cnts); - free(request->strip_buffs); - free(request); + spdk_dma_free(request->strip_buffs_cnts); + spdk_dma_free(request->strip_buffs); + spdk_dma_free(request); } static int @@ -384,10 +384,10 @@ raid5_set_req_strips_iovs_until(struct raid5_stripe_request *request, len = num_blcks * block_size_b; request->strip_buffs_cnts[idx] = end_iov_idx - *iov_idx + 1; - request->strip_buffs[idx] = calloc(request->strip_buffs_cnts[idx], sizeof(struct iovec)); + request->strip_buffs[idx] = spdk_dma_malloc(sizeof(struct iovec) * request->strip_buffs_cnts[idx], 0, NULL); if (request->strip_buffs[idx] == NULL) { for (uint8_t i = start_idx; i != idx; i = raid5_next_idx(i, raid_bdev)) { - free(request->strip_buffs[i]); + spdk_dma_free(request->strip_buffs[i]); request->strip_buffs_cnts[i] = 0; } request->strip_buffs_cnts[idx] = 0; @@ -433,7 +433,7 @@ raid5_free_req_strips_iovs_until(struct raid5_stripe_request *request, struct raid_bdev *raid_bdev = request->raid_io->raid_bdev; for (uint8_t idx = start_idx; idx != until_idx; idx = raid5_next_idx(idx, raid_bdev)) { - free(request->strip_buffs[idx]); + spdk_dma_free(request->strip_buffs[idx]); request->strip_buffs[idx] = NULL; request->strip_buffs_cnts[idx] = 0; } @@ -503,7 +503,7 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc } request->strip_buffs_cnts[sts_idx] = end_iov_idx - iov_idx + 1 + sts_idx_ofs; - request->strip_buffs[sts_idx] = calloc(request->strip_buffs_cnts[sts_idx], sizeof(struct iovec)); + request->strip_buffs[sts_idx] = spdk_dma_malloc(sizeof(struct iovec) * request->strip_buffs_cnts[sts_idx], 0, NULL); if (request->strip_buffs[sts_idx] == NULL) { raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); request->strip_buffs_cnts[sts_idx] = 0; @@ -545,11 +545,11 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc if (sts_idx_ofs == 1) { request->strip_buffs[sts_idx][0].iov_len = (raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx) - ofs_blcks) * block_size_b; - request->strip_buffs[sts_idx][0].iov_base = calloc(request->strip_buffs[sts_idx][0].iov_len, - sizeof(char)); + request->strip_buffs[sts_idx][0].iov_base = spdk_dma_zmalloc(sizeof(char) * + request->strip_buffs[sts_idx][0].iov_len, 0, NULL); if (request->strip_buffs[sts_idx][0].iov_base == NULL) { raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); - free(request->strip_buffs[sts_idx]); + spdk_dma_free(request->strip_buffs[sts_idx]); request->strip_buffs[sts_idx] = NULL; request->strip_buffs_cnts[sts_idx] = 0; return -ENOMEM; @@ -567,9 +567,9 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc if (ret != 0) { raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); + spdk_dma_free(request->strip_buffs[sts_idx][0].iov_base); } - free(request->strip_buffs[sts_idx]); + spdk_dma_free(request->strip_buffs[sts_idx]); request->strip_buffs[sts_idx] = NULL; request->strip_buffs_cnts[sts_idx] = 0; @@ -592,13 +592,13 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc } request->strip_buffs_cnts[es_idx] = end_iov_idx - iov_idx + 1 + es_idx_extra; - request->strip_buffs[es_idx] = calloc(request->strip_buffs_cnts[es_idx], sizeof(struct iovec)); + request->strip_buffs[es_idx] = spdk_dma_malloc(sizeof(struct iovec) * request->strip_buffs_cnts[es_idx], 0, NULL); if (request->strip_buffs[es_idx] == NULL) { raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); + spdk_dma_free(request->strip_buffs[sts_idx][0].iov_base); } - free(request->strip_buffs[sts_idx]); + spdk_dma_free(request->strip_buffs[sts_idx]); request->strip_buffs[sts_idx] = NULL; request->strip_buffs_cnts[sts_idx] = 0; raid5_free_req_strips_iovs_until(request, @@ -645,21 +645,21 @@ raid5_set_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_blc raid5_num_blcks(bdev_io, raid_bdev, es_idx))) * block_size_b; request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base = - calloc(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - - 1].iov_len, - sizeof(char)); + spdk_dma_zmalloc(sizeof(char) * + request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + - 1].iov_len, 0 , NULL); if (request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base == NULL) { raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); if (sts_idx_ofs == 1) { - free(request->strip_buffs[sts_idx][0].iov_base); + spdk_dma_free(request->strip_buffs[sts_idx][0].iov_base); } - free(request->strip_buffs[sts_idx]); + spdk_dma_free(request->strip_buffs[sts_idx]); request->strip_buffs[sts_idx] = NULL; request->strip_buffs_cnts[sts_idx] = 0; raid5_free_req_strips_iovs_until(request, raid5_next_idx(sts_idx, raid_bdev), es_idx); - free(request->strip_buffs[es_idx]); + spdk_dma_free(request->strip_buffs[es_idx]); request->strip_buffs[es_idx] = NULL; request->strip_buffs_cnts[es_idx] = 0; return -ENOMEM; @@ -680,9 +680,9 @@ raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_bl raid5_free_strips_buffs_until(request, after_es_idx, sts_idx); if (ofs_blcks != raid5_ofs_blcks(bdev_io, raid_bdev, sts_idx)) { - free(request->strip_buffs[sts_idx][0].iov_base); + spdk_dma_free(request->strip_buffs[sts_idx][0].iov_base); } - free(request->strip_buffs[sts_idx]); + spdk_dma_free(request->strip_buffs[sts_idx]); request->strip_buffs[sts_idx] = NULL; request->strip_buffs_cnts[sts_idx] = 0; if (sts_idx == es_idx) { @@ -694,10 +694,10 @@ raid5_free_all_strip_buffs(struct raid5_stripe_request *request, uint64_t ofs_bl if (ofs_blcks + num_blcks > raid5_ofs_blcks(bdev_io, raid_bdev, es_idx) + raid5_num_blcks(bdev_io, raid_bdev, es_idx)) { - free(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] + spdk_dma_free(request->strip_buffs[es_idx][request->strip_buffs_cnts[es_idx] - 1].iov_base); } - free(request->strip_buffs[es_idx]); + spdk_dma_free(request->strip_buffs[es_idx]); request->strip_buffs[es_idx] = NULL; request->strip_buffs_cnts[es_idx] = 0; } @@ -2696,8 +2696,8 @@ raid5_wz_req_complete_part_final(struct raid_bdev_io *raid_io, uint64_t complete } raid_bdev_destroy_cb(raid_io->raid_bdev, raid_io->raid_ch); - free(raid_io->raid_ch); - free(raid_io); + spdk_dma_free(raid_io->raid_ch); + spdk_dma_free(raid_io); return true; } else { return false; @@ -2799,7 +2799,7 @@ raid5_set_write_type(struct raid_bdev *raid_bdev) } } - raid_io = calloc(1, sizeof(struct raid_bdev_io)); + raid_io = spdk_dma_malloc(sizeof(struct raid_bdev_io), 0, NULL); if (raid_io == NULL) { r5_info->write_type = DEFAULT; return; @@ -2809,17 +2809,17 @@ raid5_set_write_type(struct raid_bdev *raid_bdev) raid_io->base_bdev_io_remaining = 0; raid_io->base_bdev_io_submitted = 0; raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; - raid_io->raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel)); + raid_io->raid_ch = spdk_dma_malloc(sizeof(struct raid_bdev_io_channel), 0, NULL); if (raid_io->raid_ch == NULL) { - free(raid_io); + spdk_dma_free(raid_io); r5_info->write_type = DEFAULT; return; } ret = raid_bdev_create_cb(raid_bdev, raid_io->raid_ch); if (ret != 0) { - free(raid_io->raid_ch); - free(raid_io); + spdk_dma_free(raid_io->raid_ch); + spdk_dma_free(raid_io); r5_info->write_type = DEFAULT; return; } @@ -2827,8 +2827,8 @@ raid5_set_write_type(struct raid_bdev *raid_bdev) ret = raid5_submit_write_zeroes_request(raid_io); if (spdk_unlikely(ret != 0)) { raid_bdev_destroy_cb(raid_bdev, raid_io->raid_ch); - free(raid_io->raid_ch); - free(raid_io); + spdk_dma_free(raid_io->raid_ch); + spdk_dma_free(raid_io); r5_info->write_type = DEFAULT; return; } @@ -2866,7 +2866,7 @@ raid5_start(struct raid_bdev *raid_bdev) raid_bdev->bdev.split_on_optimal_io_boundary = true; raid_bdev->min_base_bdevs_operational = raid_bdev->num_base_bdevs - 1; - r5_info = calloc(1, (sizeof(struct raid5_info))); + r5_info = spdk_dma_malloc((sizeof(struct raid5_info)), 0, NULL); assert(r5_info != NULL); raid_bdev->module_private = r5_info; From 77416518a57f215fe32a46d57ba347ab3296cb5d Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 20 Feb 2024 02:24:18 +0000 Subject: [PATCH 70/71] feat: Add raid_stop fn to free raid5 info struct memory. --- module/bdev/raid/raid5.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c index 2e80455cb5e..ec106f0e3cf 100644 --- a/module/bdev/raid/raid5.c +++ b/module/bdev/raid/raid5.c @@ -2898,11 +2898,18 @@ raid5_resize(struct raid_bdev *raid_bdev) } } +static bool +raid5_stop(struct raid_bdev *raid_bdev) { + spdk_dma_free(raid_bdev->module_private); + return true; +} + static struct raid_bdev_module g_raid5_module = { .level = RAID5, .base_bdevs_min = 3, .memory_domains_supported = false, .start = raid5_start, + .stop = raid5_stop, .submit_rw_request = raid5_submit_rw_request, .resize = raid5_resize }; From 1024db1f9cbe7ff7ca0a62e9d2d7d8b81bd62242 Mon Sep 17 00:00:00 2001 From: Arsene-Baitenov Date: Tue, 20 Feb 2024 03:01:51 +0000 Subject: [PATCH 71/71] feat: Remove static modifier of raid_bdev_create_cb and raid_bdev_destroy_cb fn for zero initialization. --- module/bdev/raid/bdev_raid.c | 4 ++-- module/bdev/raid/bdev_raid.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index 5c14f357016..7368dde1669 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -62,7 +62,7 @@ static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, * 0 - success * non zero - failure */ -static int +int raid_bdev_create_cb(void *io_device, void *ctx_buf) { struct raid_bdev *raid_bdev = io_device; @@ -134,7 +134,7 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) * returns: * none */ -static void +void raid_bdev_destroy_cb(void *io_device, void *ctx_buf) { struct raid_bdev_io_channel *raid_ch = ctx_buf; diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index 708076c1895..ca027f203fa 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -270,6 +270,9 @@ __RAID_MODULE_REGISTER(__LINE__)(void) \ raid_bdev_module_list_add(_module); \ } +int raid_bdev_create_cb(void *io_device, void *ctx_buf); +void raid_bdev_destroy_cb(void *io_device, void *ctx_buf); + bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, enum spdk_bdev_io_status status); void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,