diff --git a/include/spdk/util.h b/include/spdk/util.h index 34183ffe8ad..96f5ce1025a 100644 --- a/include/spdk/util.h +++ b/include/spdk/util.h @@ -42,6 +42,14 @@ extern "C" { /* Ceiling division of unsigned integers */ #define SPDK_CEIL_DIV(x,y) (((x)+(y)-1)/(y)) +#define SPDK_TEST_BIT(number_ptr, shift_size) (*(number_ptr) & (1UL << shift_size)) + +#define SPDK_SET_BIT(number_ptr, shift_size) (*(number_ptr) |= 1UL << shift_size) + +#define SPDK_REMOVE_BIT(number_ptr, shift_size) (*(number_ptr) &= ~(1UL << shift_size)) + +#define SPDK_KB_TO_B(number) (number << 10) + /** * Macro to align a value to a given power-of-two. The resultant value * will be of the same type as the first parameter, and will be no diff --git a/local-test-0-verify.state b/local-test-0-verify.state new file mode 100644 index 00000000000..b9e1fa0c0f4 Binary files /dev/null and b/local-test-0-verify.state differ diff --git a/module/bdev/raid/Makefile b/module/bdev/raid/Makefile index 17ad7f41938..f583d514bdd 100644 --- a/module/bdev/raid/Makefile +++ b/module/bdev/raid/Makefile @@ -6,11 +6,14 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 5 +SO_VER := 6 SO_MINOR := 0 - -CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ -C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c raid1.c concat.c +# -> +# CFLAGS += $(ENV_CFLAGS) +# <- +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ +# -I$(SPDK_ROOT_DIR)/lib/env_dpdk/ +C_SRCS = service.c bdev_raid.c bdev_raid_rpc.c raid0.c raid1.c concat.c ifeq ($(CONFIG_RAID5F),y) C_SRCS += raid5f.c @@ -21,3 +24,24 @@ LIBNAME = bdev_raid SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk + +# -> +# LIBDPDK_PKGCONFIG = $(call pkgconfig_filename,spdk_dpdklibs) + +# $(LIBDPDK_PKGCONFIG): $(PKGCONFIG) $(PKGCONFIG_INST) +# $(Q)$(SPDK_ROOT_DIR)/scripts/pc_libs.sh \ +# "-L$(DPDK_LIB_DIR) $(DPDK_LIB_LIST:%=-l%)" "" DPDK spdk_dpdklibs > $@ +# $(Q)sed -i.bak '5s,.*,Requires: $(DEPDIRS-$(LIBNAME):%=spdk_%) spdk_dpdklibs,' $(PKGCONFIG) ; rm $(PKGCONFIG).bak +# $(Q)sed -i.bak '5s,.*,Requires: $(DEPDIRS-$(LIBNAME):%=spdk_%) spdk_dpdklibs,' $(PKGCONFIG_INST) ; rm $(PKGCONFIG_INST).bak + +# _install_dpdklibs: $(LIBDPDK_PKGCONFIG) +# @$(call pkgconfig_install,$(LIBDPDK_PKGCONFIG)) + +# _uninstall_dpdklibs: $(LIBDPDK_PKGCONFIG) +# @$(call pkgconfig_uninstall,$(LIBDPDK_PKGCONFIG)) + +# all: $(LIBDPDK_PKGCONFIG) +# install: _install_dpdklibs +# uninstall: _uninstall_dpdklibs +# <- + diff --git a/module/bdev/raid/atomic_raid.h b/module/bdev/raid/atomic_raid.h new file mode 100644 index 00000000000..fe818aac676 --- /dev/null +++ b/module/bdev/raid/atomic_raid.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef SPDK_ATOMIC_RAID_INTERNAL_H +#define SPDK_ATOMIC_RAID_INTERNAL_H + +#include "spdk/util.h" + +//typedef int raid_atomic; //реализовать можно позже, но пока не вижу смысла + +typedef uint64_t raid_atomic64; + +#define atomic_read(ptr) (*(__typeof__(*ptr) *volatile) (ptr)) +#define atomic_set(ptr, i) ((*(__typeof__(*ptr) *volatile) (ptr)) = (i)) +#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) +#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) +#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) +#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) + +#define atomic_cmpxchg __sync_bool_compare_and_swap + + +static inline uint64_t +raid_atomic64_read(const raid_atomic64 *a) +{ + return atomic_read(a); +} + +static inline void +raid_atomic64_set(raid_atomic64 *a, uint64_t i) +{ + atomic_set(a, i); +} + +static inline void +raid_atomic64_add(uint64_t i, raid_atomic64 *a) +{ + atomic_add(a, i); +} + +static inline void +raid_atomic64_sub(uint64_t i, raid_atomic64 *a) +{ + atomic_sub(a, i); +} + +static inline void +raid_atomic64_inc(raid_atomic64 *a) +{ + atomic_inc(a); +} + +static inline void +raid_atomic64_dec(raid_atomic64 *a) +{ + atomic_dec(a); +} + +static inline uint64_t +raid_atomic64_add_return(uint64_t i, raid_atomic64 *a) +{ + return __sync_add_and_fetch(a, i); +} + +static inline uint64_t +raid_atomic64_sub_return(uint64_t i, raid_atomic64 *a) +{ + return __sync_sub_and_fetch(a, i); +} + +static inline uint64_t +raid_atomic64_inc_return(raid_atomic64 *a) +{ + return raid_atomic64_add_return(1, a); +} + +static inline uint64_t +raid_atomic64_dec_return(raid_atomic64 *a) +{ + return raid_atomic64_sub_return(1, a); +} + +static inline uint64_t +raid_atomic64_cmpxchg(raid_atomic64 *a, uint64_t old_val, uint64_t new_val) +{ + return atomic_cmpxchg(a, old_val, new_val); +} + +static inline void +raid_atomic64_set_bit(raid_atomic64 *atomic_ptr, uint64_t shift_size) +{ + uint64_t old_val; + uint64_t new_val; + do + { + old_val = raid_atomic64_read(atomic_ptr); + new_val = old_val; + SPDK_SET_BIT(&new_val, shift_size); + } while (raid_atomic64_cmpxchg(atomic_ptr, old_val, new_val)); +} + +static inline void +raid_atomic64_remove_bit(raid_atomic64 *atomic_ptr, uint64_t shift_size) +{ + uint64_t old_val; + uint64_t new_val; + do + { + old_val = raid_atomic64_read(atomic_ptr); + new_val = old_val; + SPDK_REMOVE_BIT(&new_val, shift_size); + } while (raid_atomic64_cmpxchg(atomic_ptr, old_val, new_val)); +} + +#endif /* SPDK_ATOMIC_RAID_INTERNAL_H */ \ No newline at end of file diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index d1c8ea8e240..9c4999db19e 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -5,6 +5,7 @@ */ #include "bdev_raid.h" +#include "service.h" #include "spdk/env.h" #include "spdk/thread.h" #include "spdk/log.h" @@ -185,6 +186,7 @@ raid_bdev_cleanup(struct raid_bdev *raid_bdev) TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); free(raid_bdev->base_bdev_info); + spdk_poller_unregister(&(raid_bdev->rebuild_poller)); } static void @@ -951,12 +953,7 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, return -EEXIST; } - if (level == RAID1) { - if (strip_size != 0) { - SPDK_ERRLOG("Strip size is not supported by raid1\n"); - return -EINVAL; - } - } else if (spdk_u32_is_pow2(strip_size) == false) { + if (spdk_u32_is_pow2(strip_size) == false) { SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); return -EINVAL; } @@ -1009,6 +1006,20 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, return -ENOMEM; } + /* allocate rebuild struct */ + switch(level) { + case RAID1: + raid_bdev->rebuild = calloc(1, sizeof(struct raid_rebuild)); + if (!raid_bdev->rebuild) { + SPDK_ERRLOG("Unable to allocate memory for raid rebuild struct\n"); + return -ENOMEM; + } + raid_bdev->rebuild_poller = SPDK_POLLER_REGISTER(run_rebuild_poller, raid_bdev, 200000); + break; + default: + raid_bdev->rebuild = NULL; + raid_bdev->rebuild_poller = NULL; + } raid_bdev->module = module; raid_bdev->num_base_bdevs = num_base_bdevs; raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, @@ -1539,19 +1550,28 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); - assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); - base_info->desc = desc; - base_info->blockcnt = bdev->blockcnt; raid_bdev->num_base_bdevs_discovered++; assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); - if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { - rc = raid_bdev_configure(raid_bdev); - if (rc != 0) { - SPDK_ERRLOG("Failed to configure raid bdev\n"); - return rc; - } + switch (raid_bdev->state) { + case RAID_BDEV_STATE_ONLINE: + bdev->blockcnt = base_info->blockcnt; + break; + case RAID_BDEV_STATE_CONFIGURING: + base_info->blockcnt = bdev->blockcnt; + if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { + rc = raid_bdev_configure(raid_bdev); + if (rc != 0) { + SPDK_ERRLOG("Failed to configure raid bdev\n"); + return rc; + } + } + break; + case RAID_BDEV_STATE_OFFLINE: + /* TODO when OFFLINE state is completely implemented */ + default: + SPDK_ERRLOG("unexpected bdev raid state when adding '%s' base bdev", base_info->name); } return 0; @@ -1604,6 +1624,42 @@ raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t return 0; } +static int +fill_matrix(void) { + SPDK_DEBUGLOG("Fill matrix's stub\n"); + return 0; +}; + +int +raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, char *base_bdev_name, uint8_t slot) { + int rc; + struct spdk_bdev *bdev = spdk_bdev_get_by_name(base_bdev_name); + + if (bdev == NULL) { + SPDK_ERRLOG("Currently unable to find bdev with name: %s\n", base_bdev_name); + return -ENXIO; + } + + if (bdev->blocklen != raid_bdev->bdev.blocklen) { + SPDK_ERRLOG("Blocklen of the bdev %s not matching with other base bdevs\n", base_bdev_name); + return -EINVAL; + } + + if (bdev->blockcnt < raid_bdev->bdev.blockcnt) { + SPDK_ERRLOG("The bdev %s size is too small\n", base_bdev_name); + return -EINVAL; + } + + rc = raid_bdev_add_base_device(raid_bdev, base_bdev_name, slot); + if (rc) + return rc; + + rc = fill_matrix(); + if (rc) + SPDK_ERRLOG("Failed to copy data to adding base bdev\n"); + return rc; +}; + /* * brief: * raid_bdev_examine function is the examine function call by the below layers diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index c6e31ea2ca2..7577bca80a4 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -8,6 +8,38 @@ #include "spdk/bdev_module.h" #include "spdk/uuid.h" +#include "atomic_raid.h" + +#define MATRIX_REBUILD_SIZE 32768 /* should be < syzeof(int64_t) and power of 2 */ +#define ATOMIC_TYPE raid_atomic64 +#define ATOMIC_DATA(name) ATOMIC_TYPE name +#define ATOMIC_SNAPSHOT_TYPE uint64_t /* atomic type can be converted to the type */ +#define ATOMIC_SNAPSHOT(name) ATOMIC_SNAPSHOT_TYPE name +#define LEN_AREA_STR_IN_BIT sizeof(ATOMIC_SNAPSHOT_TYPE)*8 + +#define BASE_BDEVS_MAX_NUM 64 + +#include "service.h" + +enum rebuild_flag { + /* rebuild flag set during initialization */ + REBUILD_FLAG_INIT_CONFIGURATION = 0, + + /* if there is at least one broken area in rbm(rebuild_matrix) */ + REBUILD_FLAG_NEED_REBUILD = 1, + + /* if service start rebuild cycle */ + REBUILD_FLAG_IN_PROGRESS = 2, + + /* if service start rebuild cycle */ + REBUILD_FLAG_FINISH = 3, + + /* fatal error during rebuild cycle */ + REBUILD_FLAG_FATAL_ERROR = 59, + + /* show that rebuild struct is initialized */ + REBUILD_FLAG_INITIALIZED = 60, +}; enum raid_level { INVALID_RAID_LEVEL = -1, @@ -43,6 +75,29 @@ enum raid_bdev_state { typedef void (*raid_bdev_remove_base_bdev_cb)(void *ctx, int status); +/* + * raid_rebuild assists in the raid bdev rebuild process. + */ +struct raid_rebuild { + /* stores data on broken memory areas */ + ATOMIC_DATA(rebuild_matrix[MATRIX_REBUILD_SIZE]); + + /* number of memory areas */ + uint64_t num_memory_areas; + + /* strip count in one area */ + uint64_t strips_per_area; + + /* rebuild flag */ + ATOMIC_DATA(rebuild_flag); + + /* + * structure describing a specific rebuild + * (i.e. when cycle_progress == NULL) + */ + struct rebuild_progress *cycle_progress; +}; + /* * raid_base_bdev_info contains information for the base bdevs which are part of some * raid. This structure contains the per base bdev information. Whatever is @@ -143,6 +198,12 @@ struct raid_bdev { /* Raid Level of this raid bdev */ enum raid_level level; + /* RAID rebuild struct */ + struct raid_rebuild *rebuild; + + /* Poller responsible for processing rebuild */ + struct spdk_poller *rebuild_poller; + /* Set to true if destroy of this raid bdev is started. */ bool destroy_started; @@ -190,6 +251,7 @@ const char *raid_bdev_state_to_str(enum raid_bdev_state state); void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w); int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, void *cb_ctx); +int raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, char *base_bdev_name, uint8_t slot); /* * RAID module descriptor @@ -254,6 +316,12 @@ struct raid_bdev_module { */ void (*resize)(struct raid_bdev *raid_bdev); + /* + * Called to submit rebuild request + * If implemented. + */ + int (*rebuild_request)(struct raid_bdev *raid_bdev, struct rebuild_progress *cycle_progress, spdk_bdev_io_completion_cb cb); + TAILQ_ENTRY(raid_bdev_module) link; }; diff --git a/module/bdev/raid/bdev_raid_rpc.c b/module/bdev/raid/bdev_raid_rpc.c index cd11dc329f3..192d9e13fb8 100644 --- a/module/bdev/raid/bdev_raid_rpc.c +++ b/module/bdev/raid/bdev_raid_rpc.c @@ -451,3 +451,77 @@ rpc_bdev_raid_remove_base_bdev(struct spdk_jsonrpc_request *request, rpc_bdev_raid_remove_base_bdev_done(request, rc); } SPDK_RPC_REGISTER("bdev_raid_remove_base_bdev", rpc_bdev_raid_remove_base_bdev, SPDK_RPC_RUNTIME) + +struct rpc_bdev_raid_add_base_bdev { + char *raid_name; + char *base_bdev_name; +}; + +static void +free_rpc_bdev_raid_add_base_bdev(struct rpc_bdev_raid_add_base_bdev *req) { + free(req->raid_name); + free(req->base_bdev_name); +} + +static const struct spdk_json_object_decoder rpc_bdev_raid_add_base_bdev_decoders[] = { + {"raid_name", offsetof(struct rpc_bdev_raid_add_base_bdev, raid_name), spdk_json_decode_string}, + {"base_bdev_name", offsetof(struct rpc_bdev_raid_add_base_bdev, base_bdev_name), spdk_json_decode_string}, +}; + +/* + * brief: + * bdev_raid_add_base_bdev function is the RPC for adding base bdev to a raid bdev. + * It takes base bdev name and raid name as input. + * params: + * request - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +rpc_bdev_raid_add_base_bdev(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_raid_add_base_bdev req = {}; + struct raid_bdev *raid_bdev; + int rc; + + if (spdk_json_decode_object(params, rpc_bdev_raid_add_base_bdev_decoders, + SPDK_COUNTOF(rpc_bdev_raid_add_base_bdev_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_PARSE_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + raid_bdev = raid_bdev_find_by_name(req.raid_name); + if (raid_bdev == NULL) { + spdk_jsonrpc_send_error_response_fmt(request, -ENODEV, + "raid bdev %s not found", req.raid_name); + goto cleanup; + } + + for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { + if (raid_bdev->base_bdev_info[i].name != NULL) + continue; + rc = raid_bdev_add_base_bdev(raid_bdev, req.base_bdev_name, i); + if (rc == -ENODEV) { + SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n", req.base_bdev_name); + } else if (rc != 0) { + spdk_jsonrpc_send_error_response_fmt(request, rc, + "Failed to add base bdev %s to RAID bdev %s: %s", + req.base_bdev_name, req.raid_name, + spdk_strerror(-rc)); + goto cleanup; + } else { + SPDK_DEBUGLOG(bdev_raid, "base bdev %s added to raid bdev %s\n", req.base_bdev_name, req.raid_name); + } + break; + } + + spdk_jsonrpc_send_bool_response(request, true); + +cleanup: + free_rpc_bdev_raid_add_base_bdev(&req); +} +SPDK_RPC_REGISTER("bdev_raid_add_base_bdev", rpc_bdev_raid_add_base_bdev, SPDK_RPC_RUNTIME) diff --git a/module/bdev/raid/raid1.c b/module/bdev/raid/raid1.c index 74506503060..00fdf59d2f1 100644 --- a/module/bdev/raid/raid1.c +++ b/module/bdev/raid/raid1.c @@ -2,21 +2,119 @@ * Copyright (C) 2022 Intel Corporation. * All rights reserved. */ - #include "bdev_raid.h" +#include "service.h" #include "spdk/likely.h" #include "spdk/log.h" +#include "spdk/util.h" struct raid1_info { /* The parent raid bdev */ struct raid_bdev *raid_bdev; }; +#ifdef SERVICE_DEBUG + +#define PRINT_rebuild_first_stage_cb(fs) SPDK_ERRLOG("\ +\nfirst_stage_cb: \ +\n buf_idx=%u \ +\n pd_lba=%lu \ +\n pd_blocks=%lu \n", (fs)->buf_idx, (fs)->pd_lba, (fs)->pd_blocks) + +#endif + +struct rebuild_first_stage_cb +{ + uint8_t buf_idx; + uint64_t pd_lba; + uint64_t pd_blocks; + struct rebuild_progress *cycle_progress; + struct raid_bdev *raid_bdev; + spdk_bdev_io_completion_cb cb; +}; + + +/* Find the bdev index of the current IO request */ +static uint32_t +get_current_bdev_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev_io *raid_io, uint32_t *bdev_idx) +{ + for (uint8_t i = 0; i < raid_io->raid_bdev->num_base_bdevs; i++) { + if (raid_io->raid_bdev->base_bdev_info[i].name == bdev_io->bdev->name) { + *bdev_idx = i; + return 0; + } + } + return -ENODEV; +} + +/* Allows to define the memory_rebuild_areas that are involved in current IO request */ +static void +get_io_area_range(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev, uint64_t *offset, + uint64_t *num) +{ + /* blocks */ + uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; + uint64_t num_blocks = bdev_io->u.bdev.num_blocks; + + /* blocks -> strips */ + uint64_t offset_strips = (offset_blocks) / raid_bdev->strip_size; + uint64_t num_strips = SPDK_CEIL_DIV(offset_blocks + num_blocks, + raid_bdev->strip_size) - offset_strips; + + /* strips -> areas */ + uint64_t strips_per_area = raid_bdev->rebuild->strips_per_area; + + uint64_t offset_areas = offset_strips / strips_per_area; + uint64_t num_areas = SPDK_CEIL_DIV(offset_strips + num_strips, strips_per_area) - offset_areas; + + *offset = offset_areas; + *num = num_areas; +} + +/* Write a broken block to the rebuild_matrix */ +static void +write_in_rbm_broken_block(struct spdk_bdev_io *bdev_io, struct raid_bdev_io *raid_io, + uint32_t bdev_idx) +{ + uint64_t offset_areas = 0; + uint64_t num_areas = 0; + + get_io_area_range(bdev_io, raid_io->raid_bdev, &offset_areas, &num_areas); + for (uint64_t i = offset_areas; i < offset_areas + num_areas; i++) { + uint64_t *area = &raid_io->raid_bdev->rebuild->rebuild_matrix[i]; + SPDK_SET_BIT(area, bdev_idx); + } +} + +/* Determine if a device needs a rebuild or not */ +static void +get_bdev_rebuild_status(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io, uint8_t bdev_idx) +{ + uint64_t offset_areas = 0; + uint64_t num_areas = 0; + + get_io_area_range(bdev_io, raid_bdev, &offset_areas, &num_areas); + for (uint64_t i = offset_areas; i < offset_areas + num_areas; i++) { + uint64_t area = raid_bdev->rebuild->rebuild_matrix[i]; + if (SPDK_TEST_BIT(&area, bdev_idx)) { + SPDK_SET_BIT(&(raid_bdev->rebuild->rebuild_flag), REBUILD_FLAG_NEED_REBUILD); + } + } +} + static void raid1_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid_bdev_io *raid_io = cb_arg; + uint32_t bdev_idx = 0; + + get_current_bdev_idx(bdev_io, raid_io, &bdev_idx); + + + if (!success) { + write_in_rbm_broken_block(bdev_io, raid_io, bdev_idx); + } spdk_bdev_free_io(bdev_io); @@ -60,8 +158,21 @@ raid1_submit_read_request(struct raid_bdev_io *raid_io) RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { base_ch = raid_io->raid_ch->base_channel[idx]; if (base_ch != NULL) { + get_bdev_rebuild_status(raid_bdev, bdev_io, idx); + if (raid_bdev->rebuild->rebuild_flag != REBUILD_FLAG_INIT_CONFIGURATION) { + break; + } + base_ch = NULL; + } + +// TODO REMOVE: + if (base_info->desc != NULL){ + base_ch = spdk_bdev_get_io_channel(base_info->desc); + raid_io->raid_ch->base_channel[idx] = base_ch; break; } +// ------------ + idx++; } @@ -118,8 +229,10 @@ raid1_submit_write_request(struct raid_bdev_io *raid_io) base_ch = raid_io->raid_ch->base_channel[idx]; if (base_ch == NULL) { - /* skip a missing base bdev's slot */ raid_io->base_bdev_io_submitted++; + + write_in_rbm_broken_block(bdev_io, raid_io, idx); + raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); continue; } @@ -175,6 +288,27 @@ raid1_submit_rw_request(struct raid_bdev_io *raid_io) } } +static void +init_rebuild(struct raid_bdev *raid_bdev) +{ + uint64_t stripcnt = SPDK_CEIL_DIV(raid_bdev->bdev.blockcnt, raid_bdev->strip_size); + raid_bdev->rebuild->strips_per_area = SPDK_CEIL_DIV(stripcnt, MATRIX_REBUILD_SIZE); + raid_bdev->rebuild->num_memory_areas = stripcnt / raid_bdev->rebuild->strips_per_area; + raid_bdev->rebuild->rebuild_flag = REBUILD_FLAG_INIT_CONFIGURATION; + SPDK_SET_BIT(&(raid_bdev->rebuild->rebuild_flag), REBUILD_FLAG_INITIALIZED); +} + +static void +destruct_rebuild(struct raid_bdev *raid_bdev) +{ + struct raid_rebuild *r1rebuild = raid_bdev->rebuild; + + if (r1rebuild != NULL) { + free(r1rebuild); + raid_bdev->rebuild = NULL; + } +} + static int raid1_start(struct raid_bdev *raid_bdev) { @@ -196,6 +330,8 @@ raid1_start(struct raid_bdev *raid_bdev) raid_bdev->bdev.blockcnt = min_blockcnt; raid_bdev->module_private = r1info; + init_rebuild(raid_bdev); + return 0; } @@ -206,17 +342,143 @@ raid1_stop(struct raid_bdev *raid_bdev) free(r1info); + destruct_rebuild(raid_bdev); + return true; } +static +void raid1_submit_rebuild_second_stage(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct rebuild_first_stage_cb *info = cb_arg; + struct raid_bdev *raid_bdev = info->raid_bdev; + struct rebuild_cycle_iteration *cycle_iteration = &(raid_bdev->rebuild->cycle_progress->cycle_iteration); + struct raid_base_bdev_info *base_info; + struct spdk_bdev_desc *desc; /* __base_desc_from_raid_bdev(raid_bdev, idx); */ + struct spdk_io_channel *ch; /* spdk_bdev_get_io_channel(desc); */ + struct iteration_step *cb_arg_new = NULL; + uint8_t idx = 0; + int ret = 0; + + if (!success) + { + //TODO: Handle this case (mb add new flag FIRST_STAGE_ERROR) + SPDK_WARNLOG("Problem before firs rebuild stage RAID1\n"); + return; + } + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + desc = base_info->desc; + + if (!SPDK_TEST_BIT(&(cycle_iteration->br_area_cnt), idx)){ + idx++; + continue; + } + + if (desc == NULL) { + extern_continue_rebuild(cycle_iteration->iter_idx, idx, cycle_iteration, raid_bdev); + idx++; + continue; + } + + ch = spdk_bdev_get_io_channel(desc); + + cb_arg_new = alloc_cb_arg(cycle_iteration->iter_idx, idx, cycle_iteration, raid_bdev); + ret = spdk_bdev_writev_blocks(desc, ch, + raid_bdev->rebuild->cycle_progress->base_bdevs_sg_buf[info->buf_idx], + raid_bdev->rebuild->strips_per_area, + info->pd_lba, info->pd_blocks, + info->cb, cb_arg_new); + + if (spdk_unlikely(ret != 0)) { + info->cb(NULL, false, cb_arg_new); + } + idx++; + } + + free(info); + spdk_bdev_free_io(bdev_io); +} + +static int +raid1_submit_rebuild_request(struct raid_bdev *raid_bdev, struct rebuild_progress *cycle_progress, spdk_bdev_io_completion_cb cb) +{ + struct raid_rebuild *rebuild = raid_bdev->rebuild; + struct rebuild_cycle_iteration *cycle_iter = &(cycle_progress->cycle_iteration); + struct rebuild_first_stage_cb *cb_arg = calloc(1, sizeof(struct rebuild_first_stage_cb));; + uint8_t base_idx = 0; + int ret = 0; + struct spdk_bdev_desc *desc; /*__base_desc_from_raid_bdev(raid_bdev, idx);*/ + struct spdk_io_channel *ch = NULL; /*spdk_bdev_get_io_channel(desc)*/ + struct raid_base_bdev_info *base_info; + uint64_t pd_lba, pd_blocks; + uint8_t idx = 0; + + if (cb_arg == NULL) + { + return -ENOMEM; + } + + pd_lba = get_area_offset(cycle_iter->iter_idx, rebuild->strips_per_area, raid_bdev->strip_size); + pd_blocks = get_area_size(rebuild->strips_per_area, raid_bdev->strip_size); + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) + { + desc = base_info->desc; + if (desc != NULL) + { + ch = spdk_bdev_get_io_channel(desc); + + if (ch != NULL && !SPDK_TEST_BIT(&(cycle_iter->br_area_cnt), idx)) + { + break; + } + } + idx++; + } + + if (idx == raid_bdev->num_base_bdevs) + { + SPDK_ERRLOG("No available devices for reading from raid"); + SPDK_SET_BIT(&(raid_bdev->rebuild->rebuild_flag), REBUILD_FLAG_FINISH); + return -ENODEV; + } + + if (ch == NULL) + { + SPDK_TEST_BIT(fl(rebuild), REBUILD_FLAG_FATAL_ERROR); + return -EIO; + } + + cb_arg->cb = cb; + cb_arg->pd_lba = pd_lba; + cb_arg->pd_blocks = pd_blocks; + cb_arg->raid_bdev = raid_bdev; + cb_arg->cycle_progress = cycle_progress; + cb_arg->buf_idx = base_idx; + + ret = spdk_bdev_readv_blocks(desc, ch, + cycle_progress->base_bdevs_sg_buf[base_idx], + rebuild->strips_per_area, + pd_lba, pd_blocks, + raid1_submit_rebuild_second_stage, cb_arg); + if (ret != 0) + { + SPDK_TEST_BIT(fl(rebuild), REBUILD_FLAG_FATAL_ERROR); + return -EIO; + } + return ret; +} + static struct raid_bdev_module g_raid1_module = { .level = RAID1, .base_bdevs_min = 2, .base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1}, - .memory_domains_supported = true, + .memory_domains_supported = true, //false? .start = raid1_start, .stop = raid1_stop, .submit_rw_request = raid1_submit_rw_request, + .rebuild_request = raid1_submit_rebuild_request, }; RAID_MODULE_REGISTER(&g_raid1_module) diff --git a/module/bdev/raid/service.c b/module/bdev/raid/service.c new file mode 100644 index 00000000000..8ee6274ee44 --- /dev/null +++ b/module/bdev/raid/service.c @@ -0,0 +1,418 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2018 Intel Corporation. + * All rights reserved. + */ + +#include "spdk/env.h" +#include "spdk/bdev.h" +#include "spdk/likely.h" +#include "spdk/log.h" +#include "spdk/util.h" +#include "spdk/queue.h" + +#include "atomic_raid.h" +#include "bdev_raid.h" +#include "service.h" + +/* ======================= Poller functionality =========================== */ + +/* + * The function shold be run after rebuilding of concrete area from iteration + */ +static inline void +partly_submit_iteration(bool result, uint64_t iter_idx, uint16_t area_idx, struct raid_rebuild *rebuild) +{ + struct rebuild_cycle_iteration *iter = &(rebuild->cycle_progress->cycle_iteration); + + if (result) + { + SPDK_REMOVE_BIT(&(rebuild->rebuild_matrix[iter_idx]), area_idx); + } + + ATOMIC_INCREMENT(&(iter->pr_area_cnt)); +} + +static inline void +_free_sg_buffer_part(struct iovec *vec_array, uint64_t len) +{ + struct iovec *base_vec; + + for (base_vec = vec_array; base_vec < vec_array + len; base_vec++) + { + spdk_dma_free(base_vec->iov_base); + } +} + +static inline void +free_sg_buffer(struct iovec *vec_array, uint64_t len) +{ + /* usage: struct iovec *a; free_sg_buffer(&a, b); */ + if (len != 0) + { + _free_sg_buffer_part(vec_array, len); + } + free(vec_array); +} + +uint64_t +get_area_offset(size_t area_idx, size_t area_size, size_t strip_size) +{ + return area_idx * area_size * strip_size; +} + +uint64_t +get_area_size(size_t area_size, size_t strip_size) +{ + return area_size * strip_size; +} + +static inline struct iovec * +allocate_sg_buffer(size_t elem_size, size_t elemcnt, size_t align) +{ + struct iovec *vec_array = calloc(elemcnt, sizeof(struct iovec)); + if (vec_array == NULL) + { + return NULL; + } + + for (size_t i = 0; i < elemcnt; i++) + { + vec_array[i].iov_len = elem_size; + vec_array[i].iov_base = (void *)spdk_dma_zmalloc(sizeof(uint8_t) * vec_array[i].iov_len, align, NULL); + if (vec_array[i].iov_base == NULL) + { + _free_sg_buffer_part(vec_array, i); + free(vec_array); + return NULL; + } + } + return vec_array; +} + +void reset_buffer(struct iovec *vec_array, uint32_t len) +{ + struct iovec *base_vec; + if (len == 0) + return; + + for (base_vec = vec_array; base_vec < vec_array + len; base_vec++) + { + memset(base_vec->iov_base, 0, base_vec->iov_len); + } +} + +static inline void +_free_base_bdevs_buff(struct raid_bdev *raid_bdev, struct rebuild_progress *cycle_progress, uint8_t arr_num) +{ + for (uint8_t i = 0; i < arr_num; i++) + { + free_sg_buffer(cycle_progress->base_bdevs_sg_buf[i], raid_bdev->rebuild->strips_per_area); + cycle_progress->base_bdevs_sg_buf[i] = NULL; + } +} + +static inline void +free_base_bdevs_buff(struct raid_bdev *raid_bdev, struct rebuild_progress *cycle_progress) +{ + _free_base_bdevs_buff(raid_bdev, cycle_progress, raid_bdev->num_base_bdevs); +} + +static inline int +alloc_base_bdevs_buff(struct raid_bdev *raid_bdev, struct rebuild_progress *cycle_progress) +{ + uint64_t elem_size = spdk_bdev_get_block_size(&(raid_bdev->bdev)) * raid_bdev->strip_size; + uint8_t i = 0; + struct raid_base_bdev_info *base_info; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) + { + if (base_info->desc != NULL) + { + struct spdk_bdev *bbdev = spdk_bdev_desc_get_bdev(base_info->desc); + + if (spdk_bdev_get_write_unit_size(bbdev) != 1) + { + SPDK_WARNLOG("Unsupported write_unit_size in base bdev of raid"); + } + + if (bbdev->required_alignment != 0) + { + SPDK_WARNLOG("Rebuild system unsupported alignment (TODO)"); + } + } + + cycle_progress->base_bdevs_sg_buf[i] = allocate_sg_buffer(elem_size, raid_bdev->rebuild->strips_per_area, 0); + if (cycle_progress->base_bdevs_sg_buf[i] == NULL) + { + _free_base_bdevs_buff(raid_bdev, cycle_progress, i); + return -ENOMEM; + } + + i++; + } + + return 0; +} + +static inline uint16_t +count_broken_areas(ATOMIC_SNAPSHOT_TYPE area_str) +{ + uint16_t cnt = 0; + + for (uint16_t i = 0; i < LEN_AREA_STR_IN_BIT; i++) + { + if (SPDK_TEST_BIT(&area_str, i)) + cnt += 1; + } + + return cnt; +} + +static inline int64_t +init_rebuild_cycle(struct rebuild_progress *cycle_progress, struct raid_bdev *raid_bdev) +{ + int64_t start_idx = NOT_NEED_REBUILD; + struct raid_rebuild *rebuild = raid_bdev->rebuild; + + cycle_progress->clear_area_str_cnt = 0; + cycle_progress->area_str_cnt = 0; + + for (uint64_t i = 0; i < rebuild->num_memory_areas; i++) + { + if (ATOMIC_IS_AREA_STR_CLEAR(&rebuild->rebuild_matrix[i])) + continue; + + if (start_idx == NOT_NEED_REBUILD) + { + start_idx = i; + } + + SPDK_SET_BIT(&(cycle_progress->area_proection[b_GET_IDX_BP(i)]), b_GET_SHFT_BP(i)); + + cycle_progress->area_str_cnt += 1; + } + + if (start_idx != NOT_NEED_REBUILD) + { + raid_bdev->rebuild->cycle_progress = cycle_progress; + } + else + { + raid_bdev->rebuild->cycle_progress = NULL; + } + + return start_idx; +} + +static inline int64_t +get_iter_idx(int64_t prev_idx, struct raid_bdev *raid_bdev) +{ + struct rebuild_progress *cycle_progress = raid_bdev->rebuild->cycle_progress; + + for (int64_t i = prev_idx + 1; i < (int64_t)raid_bdev->rebuild->num_memory_areas; i++) + { + if (!SPDK_TEST_BIT(&(cycle_progress->area_proection[b_GET_IDX_BP(i)]), b_GET_SHFT_BP(i))) + continue; + return i; + } + return NOT_NEED_REBUILD; +} + +static inline void +finish_rebuild_cycle(struct raid_bdev *raid_bdev) +{ + struct raid_rebuild *rebuild = raid_bdev->rebuild; + + if (rebuild == NULL) + { + return; + } + free_base_bdevs_buff(raid_bdev, rebuild->cycle_progress); + free(rebuild->cycle_progress); + rebuild->cycle_progress = NULL; + SPDK_REMOVE_BIT(fl(rebuild), REBUILD_FLAG_IN_PROGRESS); +} + +static inline void +init_cycle_iteration(struct raid_rebuild *rebuild, int64_t curr_idx) +{ + struct rebuild_cycle_iteration *cycle_iter = &(rebuild->cycle_progress->cycle_iteration); + + cycle_iter->iter_idx = curr_idx; + cycle_iter->snapshot = CREATE_AREA_STR_SNAPSHOT(&(rebuild->rebuild_matrix[curr_idx])); + cycle_iter->br_area_cnt = count_broken_areas(cycle_iter->snapshot); + cycle_iter->pr_area_cnt = 0; + cycle_iter->iter_progress = cycle_iter->snapshot; +} + +void init_cb_arg(struct iteration_step *iter_info, int64_t iter_idx, int16_t area_idx, struct rebuild_cycle_iteration *iteration, struct raid_bdev *raid_bdev) +{ + iter_info->area_idx = area_idx; + iter_info->iter_idx = iter_idx; + iter_info->iteration = iteration; + iter_info->raid_bdev = raid_bdev; +} + +struct iteration_step * +alloc_cb_arg(int64_t iter_idx, int16_t area_idx, struct rebuild_cycle_iteration *iteration, struct raid_bdev *raid_bdev) +{ + struct iteration_step *iter_info = calloc(1, sizeof(struct iteration_step)); + if (iter_info == NULL) + { + return NULL; + } + init_cb_arg(iter_info, iter_idx, area_idx, iteration, raid_bdev); + return iter_info; +} + +void free_cb_arg(struct iteration_step *cb) +{ + if (cb == NULL) + { + return; + } + free(cb); +} + +void extern_continue_rebuild(int64_t iter_idx, int16_t area_idx, struct rebuild_cycle_iteration *iteration, struct raid_bdev *raid_bdev) +{ + struct rebuild_progress *cycle_progress = raid_bdev->rebuild->cycle_progress; + int64_t next_iter_idx; + int ret = 0; + + ++cycle_progress->clear_area_str_cnt; + + /* Wether it is the last iteration or not */ + if (cycle_progress->clear_area_str_cnt == cycle_progress->area_str_cnt) + { + SPDK_SET_BIT(&(raid_bdev->rebuild->rebuild_flag), REBUILD_FLAG_FINISH); + return; + } + + next_iter_idx = get_iter_idx(iter_idx, raid_bdev); + init_cycle_iteration(raid_bdev->rebuild, next_iter_idx); + + ret = raid_bdev->module->rebuild_request(raid_bdev, cycle_progress, continue_rebuild); + + if (spdk_unlikely(ret != 0)) + { + SPDK_SET_BIT(fl(raid_bdev->rebuild), REBUILD_FLAG_FATAL_ERROR); + } +} + +/* + * Callback function. + */ +void continue_rebuild(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + int64_t iter_idx = ((struct iteration_step *)cb_arg)->iter_idx; + int16_t area_idx = ((struct iteration_step *)cb_arg)->area_idx; + struct rebuild_cycle_iteration *iteration = ((struct iteration_step *)cb_arg)->iteration; + struct raid_bdev *raid_bdev = ((struct iteration_step *)cb_arg)->raid_bdev; + + free_cb_arg(cb_arg); + + if (bdev_io != NULL) + { + // bdev_io->iov = NULL; + spdk_bdev_free_io(bdev_io); + } + partly_submit_iteration(success, iter_idx, area_idx, raid_bdev->rebuild); + + /* Test whether the end of the iteration or not */ + if (!ATOMIC_EXCHANGE(&(iteration->pr_area_cnt), iteration->br_area_cnt, 0)) + { + return; + } + + extern_continue_rebuild(iter_idx, area_idx, iteration, raid_bdev); +} + +int run_rebuild_poller(void *arg) +{ + struct raid_bdev *raid_bdev = arg; + struct raid_rebuild *rebuild = raid_bdev->rebuild; + struct rebuild_progress *cycle_progress = NULL; + int ret = 0; + + if (rebuild == NULL) + { + SPDK_WARNLOG("%s doesn't have rebuild struct!\n", raid_bdev->bdev.name); + return -ENODEV; + } + if (!SPDK_TEST_BIT(fl(rebuild), REBUILD_FLAG_INITIALIZED)) + { + /* + * the rebuild structure has not yet been initialized + */ + return 0; + } + if (SPDK_TEST_BIT(&(rebuild->rebuild_flag), REBUILD_FLAG_FATAL_ERROR)) + { + SPDK_WARNLOG("%s catch fatal error during rebuild process!\n", raid_bdev->bdev.name); + return -ENOEXEC; + } + if (SPDK_TEST_BIT(fl(rebuild), REBUILD_FLAG_IN_PROGRESS)) + { + /* + * Previous recovery process is not complete + */ + if (SPDK_TEST_BIT(fl(rebuild), REBUILD_FLAG_FINISH)) + { + finish_rebuild_cycle(raid_bdev); + } + return 0; + } + + cycle_progress = calloc(1, sizeof(struct rebuild_progress)); + + if (cycle_progress == NULL) + { + SPDK_ERRLOG("the struct rebuild_progress wasn't allocated \n"); + return -ENOMEM; + } + + /* + * Representation of area-stripe index in the area_proection + * (from which the rebuild cycle will begin) + */ + int64_t start_idx = NOT_NEED_REBUILD; + + start_idx = init_rebuild_cycle(cycle_progress, raid_bdev); + + if (start_idx == NOT_NEED_REBUILD) + { + /* + * no need rebuild + */ + free(cycle_progress); + return ret; + } + + if (raid_bdev->module->rebuild_request != NULL) + { + SPDK_SET_BIT(fl(rebuild), REBUILD_FLAG_IN_PROGRESS); + SPDK_WARNLOG("Rebuild have started...\n"); + + init_cycle_iteration(rebuild, start_idx); + if (alloc_base_bdevs_buff(raid_bdev, cycle_progress) != 0) + { + return -ENOMEM; + } + + ret = raid_bdev->module->rebuild_request(raid_bdev, cycle_progress, continue_rebuild); + switch (ret) + { + case -ENOMEM: + case -EIO: + finish_rebuild_cycle(raid_bdev); + break; + } + } + else + { + SPDK_ERRLOG("rebuild_request inside raid%d doesn't implemented\n", raid_bdev->level); + return -ENODEV; + } + return ret; +} \ No newline at end of file diff --git a/module/bdev/raid/service.h b/module/bdev/raid/service.h new file mode 100644 index 00000000000..c402c54b428 --- /dev/null +++ b/module/bdev/raid/service.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef SPDK_RAID_SERVICE_INTERNAL_H +#define SPDK_RAID_SERVICE_INTERNAL_H + +#include "spdk/queue.h" +#include "atomic_raid.h" +#include "bdev_raid.h" + +#define SERVICE_DEBUG + +//-> +#define __base_desc_from_raid_bdev(raid_bdev, idx) (raid_bdev->base_bdev_info[idx].desc) +#define fl(rebuild) &(rebuild->rebuild_flag) +#define NOT_NEED_REBUILD -1 +//-> +#define ATOMIC_IS_AREA_STR_CLEAR(ptr) (*ptr == 0) +#define CREATE_AREA_STR_SNAPSHOT(area_srt_ptr) raid_atomic64_read(area_srt_ptr) +#define ATOMIC_INCREMENT(ptr) raid_atomic64_inc(ptr) +#define ATOMIC_DECREMENT(ptr) raid_atomic64_dec(ptr) +#define ATOMIC_EXCHANGE(dest_ptr, exc, src) (raid_atomic64_cmpxchg(dest_ptr, exc, src)) +// -> +#define b_BASE_TYPE uint64_t +#define b_BIT_PROECTION(name) b_BASE_TYPE name[SPDK_CEIL_DIV(MATRIX_REBUILD_SIZE, (sizeof(b_BASE_TYPE) * 8))] +#define b_GET_IDX_BP(x) (x / (sizeof(b_BASE_TYPE) * 8)) +#define b_GET_SHFT_BP(x) (x % (sizeof(b_BASE_TYPE) * 8)) +// + +static inline bool +_CAS(ATOMIC_TYPE *ptr, ATOMIC_SNAPSHOT_TYPE exc, ATOMIC_SNAPSHOT_TYPE src) +{ + if (*ptr == exc) + { + *ptr = src; + return true; + } + return false; +} + +struct iteration_step +{ + int16_t area_idx; + int64_t iter_idx; + struct rebuild_cycle_iteration *iteration; + struct raid_bdev *raid_bdev; + spdk_bdev_io_completion_cb cb; +}; + +struct rebuild_cycle_iteration +{ + /* number of broken areas in current area stripe */ + int16_t br_area_cnt; + + /* index of the area stripe for rebuld */ + int64_t iter_idx; + + /* processed areas counter, it increments after completion rebuild a concrete area */ + ATOMIC_DATA(pr_area_cnt); + + /* snapshot of area stripe from rebuild matrix (non atomic) */ + ATOMIC_SNAPSHOT(snapshot); + + /* + * metadata for current iteration, + * describing which areas should still be started for rebuild + * (equals snapshot at initialization stage) + * (10..010 |-[start rebuild area with index 1]-> 10..000) + */ + ATOMIC_SNAPSHOT(iter_progress); +}; + +struct rebuild_progress +{ + /* + * bit proection of rebuild matrix, + * where each bit corresponds one line(area stripe) in rebuild matrix + * (if the line contains broken areas, corresponding bit equels 1 othewise 0) + */ + b_BIT_PROECTION(area_proection); + + /* number of areas stripes with broken areas */ + uint64_t area_str_cnt; + + /* number of area stripes with processed areas (tried to rebuild all the broken areas) */ + uint64_t clear_area_str_cnt; + + /* + * To avoid memory overloading, only one area stripe (in need of rebuild) + * can be processed at a time. + * The fild describes the rebuild of this area stripe. + */ + struct rebuild_cycle_iteration cycle_iteration; + + /* + * Buffers for raid base_bdevs. + * Each element - SG-buffer (array of iovec); + * Size of each SG-buffer is size of one memory area in bytes; + * One element from SG-buffer describes buffer size equals size of one strip in bytes. + */ + struct iovec *base_bdevs_sg_buf[BASE_BDEVS_MAX_NUM]; +}; + +int +run_rebuild_poller(void *arg); + +void continue_rebuild(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); + +void +extern_continue_rebuild(int64_t iter_idx, + int16_t area_idx, + struct rebuild_cycle_iteration *iteration, + struct raid_bdev *raid_bdev); + +struct iteration_step * +alloc_cb_arg(int64_t iter_idx, int16_t area_idx, struct rebuild_cycle_iteration *iteration, struct raid_bdev *raid_bdev); + +void init_cb_arg(struct iteration_step *iter_info, + int64_t iter_idx, + int16_t area_idx, + struct rebuild_cycle_iteration *iteration, + struct raid_bdev *raid_bdev); + +void free_cb_arg(struct iteration_step *cb); + +void reset_buffer(struct iovec *vec_array, uint32_t len); + +uint64_t +get_area_offset(size_t area_idx, size_t area_size, size_t strip_size); + +uint64_t +get_area_size(size_t area_size, size_t strip_size); + +#ifdef SERVICE_DEBUG + +#define PRINT_iteration_step(it) SPDK_ERRLOG("\ +\niter_step: \ +\n area_idx=%d \ +\n iter_idx=%ld \n", (it)->area_idx, (it)->iter_idx) + +#define PRINT_rebuild_cycle_iteration(cit) SPDK_ERRLOG("\ +\ncycle_iter: \ +\n br_area_cnt=%d \ +\n iter_idx=%ld \ +\n pr_area_cnt=%lu \ +\n snapshot=%lu \ +\n iter_progress=%lu \n", (cit)->br_area_cnt, (cit)->iter_idx, (cit)->pr_area_cnt, (cit)->snapshot, (cit)->iter_progress) + +#define PRINT_rebuild_progress(pr) SPDK_ERRLOG("\ +\nprogress: \ +\n area_str_cnt=%lu \ +\n clear_area_str_cnt=%lu \n", (pr)->area_str_cnt, (pr)->clear_area_str_cnt) + +#endif + +#endif /* SPDK_RAID_SERVICE_INTERNAL_H */ diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index 9245d76007c..b605d96a81c 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -448,6 +448,20 @@ def bdev_raid_remove_base_bdev(client, name): params = {'name': name} return client.call('bdev_raid_remove_base_bdev', params) +def bdev_raid_add_base_bdev(client, raid_name, base_bdev_name): + """Add base bdev to existing raid bdev + + Args: + raid_name: raid bdev name + base_bdev_name: base bdev name + + Returns: + None + """ + params = {'raid_name': raid_name, + 'base_bdev_name': base_bdev_name} + return client.call('bdev_raid_add_base_bdev', params) + def bdev_aio_create(client, filename, name, block_size=None, readonly=False): """Construct a Linux AIO block device. diff --git a/scripts/rpc.py b/scripts/rpc.py index 969007d8ba3..2efb0917ae5 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -2138,6 +2138,15 @@ def bdev_raid_remove_base_bdev(args): p.add_argument('name', help='base bdev name') p.set_defaults(func=bdev_raid_remove_base_bdev) + def bdev_raid_add_base_bdev(args): + rpc.bdev.bdev_raid_add_base_bdev(args.client, + raid_name=args.raid_name, + base_bdev_name=args.base_bdev_name) + p = subparsers.add_parser('bdev_raid_add_base_bdev', help='Add a basic bdev to an existing raid bdev') + p.add_argument('-r', '--raid-name', help='raid bdev name', required=True) + p.add_argument('-b', '--base-bdev-name', help = 'base bdev name', required=True) + p.set_defaults(func=bdev_raid_add_base_bdev) + # split def bdev_split_create(args): print_array(rpc.bdev.bdev_split_create(args.client, diff --git a/stop.json b/stop.json new file mode 100644 index 00000000000..e4e4d7ee7de --- /dev/null +++ b/stop.json @@ -0,0 +1,27 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_delete", + "params": { + "name": "Raid1" + } + }, + { + "params": { + "name": "M0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "M1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid1_rebuild_tests.sh b/test/bdev/raid1_rebuild_tests.sh new file mode 100755 index 00000000000..92f87e3f7c6 --- /dev/null +++ b/test/bdev/raid1_rebuild_tests.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Run with sudo + +# $1 - .../spdk (full_path) +# $2 - .../{ublk_drv} (full_path) + +# If spdk ublk unable +function setup_ublk() { + ./configure --with-ublk; + make -j6; + cd "$1"; + insmod ./ublk_drv.ko; +} + +function fio_test_raid1() { + + local rpc_json_path=./test/bdev/raid1_test_config/rpc_json + local fio_cfg_path=./test/bdev/raid1_test_config/fio_cfg + + echo " "; + echo "------------> TEST: $1 & $3 :START <------------"; + + ./scripts/rpc.py load_config -j $rpc_json_path/$1; + sleep 1; + ./scripts/rpc.py ublk_start_disk Raid1 1; + sleep 1; + fio $fio_cfg_path/$3; + + if [ $? -eq 0 ]; + then + echo "$1 & $3 test PASSED"; + else + echo "$1 & $3 test FAILED"; + fi + + ./scripts/rpc.py ublk_stop_disk 1; + sleep 1; + ./scripts/rpc.py load_config -j $rpc_json_path/$2; + sleep 1; + + echo "------------> TEST: $1 & $3 :FINISH <------------"; + echo " "; +} + +function start() { + ./scripts/setup.sh; + make -j6; + sleep 1; + screen -dmS spdk_tgt ./build/bin/spdk_tgt; + sleep 1; + ./scripts/rpc.py ublk_create_target; + sleep 1; +} + +function finish() { + ./scripts/rpc.py ublk_destroy_target; + screen -S spdk_tgt -X kill; +} + +if [ -z "$1" ] +then + spdk_path=.; +else + spdk_path=$1; +fi + +if [ -n "$2" ] +then + cd spdk_path; + setup_ublk "$2"; +fi + +cd spdk_path +start; + +fio_test_raid1 raid1.json stop.json randwrite.fio; +fio_test_raid1 raid1.json stop.json write.fio; + +finish; diff --git a/test/bdev/raid1_test_config/fio_cfg/randwrite.fio b/test/bdev/raid1_test_config/fio_cfg/randwrite.fio new file mode 100644 index 00000000000..4d9ab3d5ad6 --- /dev/null +++ b/test/bdev/raid1_test_config/fio_cfg/randwrite.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +size=1M +iodepth=8 +rw=randwrite +bs=4k + +[test] +filename=/dev/ublkb1 +numjobs=1 diff --git a/test/bdev/raid1_test_config/fio_cfg/write.fio b/test/bdev/raid1_test_config/fio_cfg/write.fio new file mode 100644 index 00000000000..010046a29e4 --- /dev/null +++ b/test/bdev/raid1_test_config/fio_cfg/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +size=64M +iodepth=1 +rw=write +bs=4k + +[test] +filename=/dev/ublkb1 +numjobs=1 diff --git a/test/bdev/raid1_test_config/rpc_json/raid1.json b/test/bdev/raid1_test_config/rpc_json/raid1.json new file mode 100644 index 00000000000..ddde01c4667 --- /dev/null +++ b/test/bdev/raid1_test_config/rpc_json/raid1.json @@ -0,0 +1,37 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M1" + }, + "method": "bdev_malloc_create" + }, + { + "method": "bdev_raid_create", + "params": { + "name": "Raid1", + "raid_level": "1", + "strip_size_kb": 8, + "base_bdevs": [ + "M0", + "M1" + ] + } + } + ] + } + ] +} diff --git a/test/bdev/raid1_test_config/rpc_json/stop.json b/test/bdev/raid1_test_config/rpc_json/stop.json new file mode 100644 index 00000000000..e4e4d7ee7de --- /dev/null +++ b/test/bdev/raid1_test_config/rpc_json/stop.json @@ -0,0 +1,27 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_delete", + "params": { + "name": "Raid1" + } + }, + { + "params": { + "name": "M0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "M1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/verif.fio b/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/verif.fio new file mode 100644 index 00000000000..b4b4c28d8ba --- /dev/null +++ b/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/verif.fio @@ -0,0 +1,11 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +verify_only +size=4096B +iodepth=1 + +[test] +filename=/dev/ublkb20 +numjobs=1 diff --git a/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/write.fio b/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/write.fio new file mode 100644 index 00000000000..2b23d9338f2 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/fio_cfg/two_malloc/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +size=4096B +iodepth=1 +rw=write +bs=4k + +[test] +filename=/dev/ublkb20 +numjobs=1 diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/addM2.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/addM2.json new file mode 100644 index 00000000000..77b4378af53 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/addM2.json @@ -0,0 +1,16 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_add_base_bdev", + "params": { + "raid_name": "Raid1", + "base_bdev_name": "M2" + } + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/crt.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/crt.json new file mode 100644 index 00000000000..875da446020 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/crt.json @@ -0,0 +1,45 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M2" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M1" + }, + "method": "bdev_malloc_create" + }, + { + "method": "bdev_raid_create", + "params": { + "name": "Raid1", + "raid_level": "1", + "strip_size_kb": 8, + "base_bdevs": [ + "M0", + "M1" + ] + } + } + ] + } + ] +} diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/dtr.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/dtr.json new file mode 100644 index 00000000000..c875a0d62fc --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/dtr.json @@ -0,0 +1,33 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_delete", + "params": { + "name": "Raid1" + } + }, + { + "params": { + "name": "M0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "M1" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "M2" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM0.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM0.json new file mode 100644 index 00000000000..18658f38298 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM0.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_remove_base_bdev", + "params": { + "name": "M0" + } + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM1.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM1.json new file mode 100644 index 00000000000..1f990af83c1 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM1.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_remove_base_bdev", + "params": { + "name": "M1" + } + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM2.json b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM2.json new file mode 100644 index 00000000000..b08b415dcd1 --- /dev/null +++ b/test/bdev/raid_service_config/raid1/rpc_cfg/two_malloc/rfM2.json @@ -0,0 +1,15 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_remove_base_bdev", + "params": { + "name": "M2" + } + } + ] + } + ] + } \ No newline at end of file diff --git a/test/bdev/raid_service_config/raid1/run/two_malloc.sh b/test/bdev/raid_service_config/raid1/run/two_malloc.sh new file mode 100755 index 00000000000..97c06b50aef --- /dev/null +++ b/test/bdev/raid_service_config/raid1/run/two_malloc.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Run with sudo + +test_name="two_malloc" + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../../../..) +rpc_cfgdir=$(readlink -f $testdir/../rpc_cfg/$test_name) +fio_cfgdir=$(readlink -f $testdir/../fio_cfg/$test_name) + +# Test scenario +# crt=$rpc_cfgdir/crt.json + +ret=1 + +function start { + $rootdir/scripts/rpc.py load_config -j $rpc_cfgdir/crt.json; + sleep 1; + + $rootdir/scripts/rpc.py ublk_start_disk Raid1 20; + sleep 1; +} + +function action { + $rootdir/scripts/rpc.py load_config -j $rpc_cfgdir/rfM0.json; + sleep 1; + + $rootdir/scripts/rpc.py bdev_raid_get_bdevs all; + + fio $fio_cfgdir/write.fio; + sleep 1; + + $rootdir/scripts/rpc.py load_config -j $rpc_cfgdir/addM2.json; + sleep 6; + + $rootdir/scripts/rpc.py load_config -j $rpc_cfgdir/rfM1.json; + sleep 1; + + $rootdir/scripts/rpc.py bdev_raid_get_bdevs all; + + fio $fio_cfgdir/verif.fio; + sleep 1; + + ret=$? + +} + +function finish { + $rootdir/scripts/rpc.py ublk_stop_disk 20; + sleep 1; + + $rootdir/scripts/rpc.py load_config -j $rpc_cfgdir/dtr.json + sleep 1; +} + +if [ -n "$1" ] +then + if [ "$1" = "start" ] + then + start; + action; + exit $(($ret)); + else + finish; + fi +else + start; + action; + finish; + exit $(($ret)); +fi diff --git a/test/bdev/raid_service_raid1.sh b/test/bdev/raid_service_raid1.sh new file mode 100755 index 00000000000..23dd9a8affe --- /dev/null +++ b/test/bdev/raid_service_raid1.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Run with sudo + +# Dir pathes +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +cfgdir=$(readlink -f $testdir/raid_service_config/raid1) +rundir=$(readlink -f $cfgdir/run) + +function start_ublk_tgt() { + $rootdir/scripts/rpc.py ublk_create_target; + sleep 1; + echo "Ublk target has been created" +} + +function stop_ublk_tgt() { + $rootdir/scripts/rpc.py ublk_destroy_target; + sleep 2; + echo "stop ublk target"; +} + +function start_tgt() { + screen -dmS spdk_tgt $rootdir/build/bin/spdk_tgt; + sleep 1; + echo "spdk_tgt has been started" + start_ublk_tgt; +} + +function finish_tgt() { + stop_ublk_tgt; + screen -S spdk_tgt -X kill; + echo "spdk_tgt has been finished" +} + +function run_tests() { + for file in $rundir/* + do + if [ -x "$file" ]; + then + local ret=""; + + echo -e "\e[34mTEST(start)\e[0m: \e[36m$file\e[0m" + "$file" start; + + if [ $? -eq 0 ] + then + ret="\e[32mSUCCESS\e[0m" + else + ret="\e[31mFAILED\e[0m" + fi + + "$file" finish; + echo -e "\e[34mTEST(finish)\e[0m: $ret" + echo "" + fi + done +} + +$rootdir/scripts/setup.sh; +echo "Start build process"; +cd $rootdir +make -j12; +cd $testdir +sleep 1; + + +if [ -n "$1" ] +then + if [ "$1" = "tgt" ] + then + start_tgt; + run_tests; + finish_tgt; + fi +else + start_ublk_tgt; + run_tests; + stop_ublk_tgt; +fi \ No newline at end of file diff --git a/test/unit/lib/bdev/raid/Makefile b/test/unit/lib/bdev/raid/Makefile index 1550ceb2ded..1ba53e899be 100644 --- a/test/unit/lib/bdev/raid/Makefile +++ b/test/unit/lib/bdev/raid/Makefile @@ -6,7 +6,8 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = bdev_raid.c concat.c raid1.c +DIRS-y = concat.c +# raid1.c bdev_raid.c DIRS-$(CONFIG_RAID5F) += raid5f.c