diff --git a/include/spdk/util.h b/include/spdk/util.h index 34183ffe8ad..96f5ce1025a 100644 --- a/include/spdk/util.h +++ b/include/spdk/util.h @@ -42,6 +42,14 @@ extern "C" { /* Ceiling division of unsigned integers */ #define SPDK_CEIL_DIV(x,y) (((x)+(y)-1)/(y)) +#define SPDK_TEST_BIT(number_ptr, shift_size) (*(number_ptr) & (1UL << shift_size)) + +#define SPDK_SET_BIT(number_ptr, shift_size) (*(number_ptr) |= 1UL << shift_size) + +#define SPDK_REMOVE_BIT(number_ptr, shift_size) (*(number_ptr) &= ~(1UL << shift_size)) + +#define SPDK_KB_TO_B(number) (number << 10) + /** * Macro to align a value to a given power-of-two. The resultant value * will be of the same type as the first parameter, and will be no diff --git a/local-test-0-verify.state b/local-test-0-verify.state new file mode 100644 index 00000000000..b9e1fa0c0f4 Binary files /dev/null and b/local-test-0-verify.state differ diff --git a/module/bdev/raid/atomic_raid.h b/module/bdev/raid/atomic_raid.h new file mode 100644 index 00000000000..f4e2378f2d9 --- /dev/null +++ b/module/bdev/raid/atomic_raid.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef SPDK_ATOMIC_RAID_INTERNAL_H +#define SPDK_ATOMIC_RAID_INTERNAL_H + +#include "spdk/util.h" + +//typedef int raid_atomic; //реализовать можно позже, но пока не вижу смысла + +typedef uint64_t raid_atomic64; + +#define atomic_read(ptr) (*(__typeof__(*ptr) *volatile) (ptr)) +#define atomic_set(ptr, i) ((*(__typeof__(*ptr) *volatile) (ptr)) = (i)) +#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) +#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) +#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) +#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) + +#define atomic_cmpxchg_bool __sync_bool_compare_and_swap +#define atomic_cmpxchg_val __sync_val_compare_and_swap + + + +static inline uint64_t +raid_atomic64_read(const raid_atomic64 *a) +{ + return atomic_read(a); +} + +static inline void +raid_atomic64_set(raid_atomic64 *a, uint64_t i) +{ + atomic_set(a, i); +} + +static inline void +raid_atomic64_add(uint64_t i, raid_atomic64 *a) +{ + atomic_add(a, i); +} + +static inline void +raid_atomic64_sub(uint64_t i, raid_atomic64 *a) +{ + atomic_sub(a, i); +} + +static inline void +raid_atomic64_inc(raid_atomic64 *a) +{ + atomic_inc(a); +} + +static inline void +raid_atomic64_dec(raid_atomic64 *a) +{ + atomic_dec(a); +} + +static inline uint64_t +raid_atomic64_add_return(uint64_t i, raid_atomic64 *a) +{ + return __sync_add_and_fetch(a, i); +} + +static inline uint64_t +raid_atomic64_sub_return(uint64_t i, raid_atomic64 *a) +{ + return __sync_sub_and_fetch(a, i); +} + +static inline uint64_t +raid_atomic64_inc_return(raid_atomic64 *a) +{ + return raid_atomic64_add_return(1, a); +} + +static inline uint64_t +raid_atomic64_dec_return(raid_atomic64 *a) +{ + return raid_atomic64_sub_return(1, a); +} + +static inline bool +raid_atomic64_cmpxchg_bool(raid_atomic64 *a, uint64_t old_val, uint64_t new_val) +{ + return atomic_cmpxchg_bool(a, old_val, new_val); +} + +static inline uint64_t +raid_atomic64_cmpxchg_val(raid_atomic64 *a, uint64_t old_val, uint64_t new_val) +{ + return atomic_cmpxchg_val(a, old_val, new_val); +} + +static inline void +raid_atomic64_set_bit(raid_atomic64 *atomic_ptr, uint64_t shift_size) +{ + uint64_t old_val; + uint64_t new_val; + do + { + old_val = raid_atomic64_read(atomic_ptr); + new_val = old_val; + SPDK_SET_BIT(&new_val, shift_size); + } while (raid_atomic64_cmpxchg_bool(atomic_ptr, old_val, new_val)); +} + +static inline void +raid_atomic64_remove_bit(raid_atomic64 *atomic_ptr, uint64_t shift_size) +{ + uint64_t old_val; + uint64_t new_val; + do + { + old_val = raid_atomic64_read(atomic_ptr); + new_val = old_val; + SPDK_REMOVE_BIT(&new_val, shift_size); + } while (raid_atomic64_cmpxchg_bool(atomic_ptr, old_val, new_val)); +} + +#endif /* SPDK_ATOMIC_RAID_INTERNAL_H */ \ No newline at end of file diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index d1c8ea8e240..847ce61411e 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -951,12 +951,7 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, return -EEXIST; } - if (level == RAID1) { - if (strip_size != 0) { - SPDK_ERRLOG("Strip size is not supported by raid1\n"); - return -EINVAL; - } - } else if (spdk_u32_is_pow2(strip_size) == false) { + if (spdk_u32_is_pow2(strip_size) == false) { SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); return -EINVAL; } @@ -1009,6 +1004,13 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, return -ENOMEM; } + /* allocate rebuild struct */ + raid_bdev->rebuild = calloc(1, sizeof(struct raid_rebuild)); + if (!raid_bdev->rebuild) { + SPDK_ERRLOG("Unable to allocate memory for raid rebuild struct\n"); + return -ENOMEM; + } + raid_bdev->module = module; raid_bdev->num_base_bdevs = num_base_bdevs; raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index c6e31ea2ca2..3c6e97d317c 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -6,8 +6,19 @@ #ifndef SPDK_BDEV_RAID_INTERNAL_H #define SPDK_BDEV_RAID_INTERNAL_H +#define MATRIX_REBUILD_SIZE 32768 /* 2^15 */ + #include "spdk/bdev_module.h" #include "spdk/uuid.h" +#include "atomic_raid.h" + +enum rebuild_flag { + /* rebuild flag set during initialization */ + REBUILD_FLAG_INIT_CONFIGURATION = 0UL, + + /* if there is at least one broken area in rbm(rebuild_matrix) */ + REBUILD_FLAG_NEED_REBUILD = 1UL, +}; enum raid_level { INVALID_RAID_LEVEL = -1, @@ -43,6 +54,23 @@ enum raid_bdev_state { typedef void (*raid_bdev_remove_base_bdev_cb)(void *ctx, int status); +/* + * raid_rebuild assists in the raid bdev rebuild process. + */ +struct raid_rebuild { + /* stores data on broken memory areas */ + raid_atomic64 rebuild_matrix[MATRIX_REBUILD_SIZE]; + + /* number of memory areas */ + uint64_t num_memory_areas; + + /* strip count in one area */ + uint64_t strips_per_area; + + /* rebuild flag */ + raid_atomic64 rebuild_flag; +}; + /* * raid_base_bdev_info contains information for the base bdevs which are part of some * raid. This structure contains the per base bdev information. Whatever is @@ -143,6 +171,9 @@ struct raid_bdev { /* Raid Level of this raid bdev */ enum raid_level level; + /* RAID rebuild struct */ + struct raid_rebuild *rebuild; + /* Set to true if destroy of this raid bdev is started. */ bool destroy_started; diff --git a/module/bdev/raid/raid1.c b/module/bdev/raid/raid1.c index 74506503060..bd17b7bb119 100644 --- a/module/bdev/raid/raid1.c +++ b/module/bdev/raid/raid1.c @@ -2,24 +2,84 @@ * Copyright (C) 2022 Intel Corporation. * All rights reserved. */ - #include "bdev_raid.h" #include "spdk/likely.h" #include "spdk/log.h" +#include "spdk/util.h" struct raid1_info { /* The parent raid bdev */ struct raid_bdev *raid_bdev; }; +/* Find the bdev index of the current IO request */ +static uint32_t +get_current_bdev_idx(struct spdk_bdev_io *bdev_io, struct raid_bdev_io *raid_io, uint32_t *bdev_idx) +{ + for (uint8_t i = 0; i < raid_io->raid_bdev->num_base_bdevs; i++) { + if (raid_io->raid_bdev->base_bdev_info[i].name == bdev_io->bdev->name) { + *bdev_idx = i; + return 0; + } + } + return -ENODEV; +} + +/* Allows to define the memory_rebuild_areas that are involved in current IO request */ +static void +get_io_area_range(struct spdk_bdev_io *bdev_io, struct raid_bdev *raid_bdev, uint64_t *offset, + uint64_t *num) +{ + /* blocks */ + uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; + uint64_t num_blocks = bdev_io->u.bdev.num_blocks; + + /* blocks -> strips */ + uint64_t offset_strips = (offset_blocks) / raid_bdev->strip_size; + uint64_t num_strips = SPDK_CEIL_DIV(offset_blocks + num_blocks, + raid_bdev->strip_size) - offset_strips; + + /* strips -> areas */ + uint64_t strips_per_area = raid_bdev->rebuild->strips_per_area; + + uint64_t offset_areas = offset_strips / strips_per_area; + uint64_t num_areas = SPDK_CEIL_DIV(offset_strips + num_strips, strips_per_area) - offset_areas; + + *offset = offset_areas; + *num = num_areas; +} + +/* Write a broken block to the rebuild_matrix */ +static void +write_in_rbm_broken_block(struct spdk_bdev_io *bdev_io, struct raid_bdev_io *raid_io, + uint32_t bdev_idx) +{ + raid_atomic64_set(&raid_io->raid_bdev->rebuild->rebuild_flag, REBUILD_FLAG_NEED_REBUILD); + uint64_t offset_areas = 0; + uint64_t num_areas = 0; + + get_io_area_range(bdev_io, raid_io->raid_bdev, &offset_areas, &num_areas); + for (uint64_t i = offset_areas; i < offset_areas + num_areas; i++) { + raid_atomic64 *area = &raid_io->raid_bdev->rebuild->rebuild_matrix[i]; + raid_atomic64_set_bit(area, bdev_idx); + } +} + static void raid1_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct raid_bdev_io *raid_io = cb_arg; + uint32_t bdev_idx = 0; + + get_current_bdev_idx(bdev_io, raid_io, &bdev_idx); spdk_bdev_free_io(bdev_io); + if (!success) { + write_in_rbm_broken_block(bdev_io, raid_io, bdev_idx); + } + raid_bdev_io_complete_part(raid_io, 1, success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED); @@ -60,7 +120,10 @@ raid1_submit_read_request(struct raid_bdev_io *raid_io) RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { base_ch = raid_io->raid_ch->base_channel[idx]; if (base_ch != NULL) { - break; + if (raid_bdev->rebuild->rebuild_flag != REBUILD_FLAG_INIT_CONFIGURATION) { + break; + } + base_ch = NULL; } idx++; } @@ -118,8 +181,10 @@ raid1_submit_write_request(struct raid_bdev_io *raid_io) base_ch = raid_io->raid_ch->base_channel[idx]; if (base_ch == NULL) { - /* skip a missing base bdev's slot */ raid_io->base_bdev_io_submitted++; + + write_in_rbm_broken_block(bdev_io, raid_io, idx); + raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); continue; } @@ -175,6 +240,26 @@ raid1_submit_rw_request(struct raid_bdev_io *raid_io) } } +static void +init_rebuild(struct raid_bdev *raid_bdev) +{ + raid_bdev->rebuild->num_memory_areas = MATRIX_REBUILD_SIZE; + uint64_t stripcnt = SPDK_CEIL_DIV(raid_bdev->bdev.blockcnt, raid_bdev->strip_size); + raid_bdev->rebuild->strips_per_area = SPDK_CEIL_DIV(stripcnt, MATRIX_REBUILD_SIZE); + raid_bdev->rebuild->rebuild_flag = REBUILD_FLAG_INIT_CONFIGURATION; +} + +static void +destruct_rebuild(struct raid_bdev *raid_bdev) +{ + struct raid_rebuild *r1rebuild = raid_bdev->rebuild; + + if (r1rebuild != NULL) { + free(r1rebuild); + raid_bdev->rebuild = NULL; + } +} + static int raid1_start(struct raid_bdev *raid_bdev) { @@ -196,6 +281,8 @@ raid1_start(struct raid_bdev *raid_bdev) raid_bdev->bdev.blockcnt = min_blockcnt; raid_bdev->module_private = r1info; + init_rebuild(raid_bdev); + return 0; } @@ -206,6 +293,8 @@ raid1_stop(struct raid_bdev *raid_bdev) free(r1info); + destruct_rebuild(raid_bdev); + return true; } diff --git a/test/bdev/raid1_rebuild_tests.sh b/test/bdev/raid1_rebuild_tests.sh new file mode 100755 index 00000000000..92f87e3f7c6 --- /dev/null +++ b/test/bdev/raid1_rebuild_tests.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Run with sudo + +# $1 - .../spdk (full_path) +# $2 - .../{ublk_drv} (full_path) + +# If spdk ublk unable +function setup_ublk() { + ./configure --with-ublk; + make -j6; + cd "$1"; + insmod ./ublk_drv.ko; +} + +function fio_test_raid1() { + + local rpc_json_path=./test/bdev/raid1_test_config/rpc_json + local fio_cfg_path=./test/bdev/raid1_test_config/fio_cfg + + echo " "; + echo "------------> TEST: $1 & $3 :START <------------"; + + ./scripts/rpc.py load_config -j $rpc_json_path/$1; + sleep 1; + ./scripts/rpc.py ublk_start_disk Raid1 1; + sleep 1; + fio $fio_cfg_path/$3; + + if [ $? -eq 0 ]; + then + echo "$1 & $3 test PASSED"; + else + echo "$1 & $3 test FAILED"; + fi + + ./scripts/rpc.py ublk_stop_disk 1; + sleep 1; + ./scripts/rpc.py load_config -j $rpc_json_path/$2; + sleep 1; + + echo "------------> TEST: $1 & $3 :FINISH <------------"; + echo " "; +} + +function start() { + ./scripts/setup.sh; + make -j6; + sleep 1; + screen -dmS spdk_tgt ./build/bin/spdk_tgt; + sleep 1; + ./scripts/rpc.py ublk_create_target; + sleep 1; +} + +function finish() { + ./scripts/rpc.py ublk_destroy_target; + screen -S spdk_tgt -X kill; +} + +if [ -z "$1" ] +then + spdk_path=.; +else + spdk_path=$1; +fi + +if [ -n "$2" ] +then + cd spdk_path; + setup_ublk "$2"; +fi + +cd spdk_path +start; + +fio_test_raid1 raid1.json stop.json randwrite.fio; +fio_test_raid1 raid1.json stop.json write.fio; + +finish; diff --git a/test/bdev/raid1_test_config/fio_cfg/randwrite.fio b/test/bdev/raid1_test_config/fio_cfg/randwrite.fio new file mode 100644 index 00000000000..4d9ab3d5ad6 --- /dev/null +++ b/test/bdev/raid1_test_config/fio_cfg/randwrite.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +size=1M +iodepth=8 +rw=randwrite +bs=4k + +[test] +filename=/dev/ublkb1 +numjobs=1 diff --git a/test/bdev/raid1_test_config/fio_cfg/write.fio b/test/bdev/raid1_test_config/fio_cfg/write.fio new file mode 100644 index 00000000000..010046a29e4 --- /dev/null +++ b/test/bdev/raid1_test_config/fio_cfg/write.fio @@ -0,0 +1,12 @@ +[global] +thread=1 +group_reporting=1 +verify=sha256 +size=64M +iodepth=1 +rw=write +bs=4k + +[test] +filename=/dev/ublkb1 +numjobs=1 diff --git a/test/bdev/raid1_test_config/rpc_json/raid1.json b/test/bdev/raid1_test_config/rpc_json/raid1.json new file mode 100644 index 00000000000..ddde01c4667 --- /dev/null +++ b/test/bdev/raid1_test_config/rpc_json/raid1.json @@ -0,0 +1,37 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M0" + }, + "method": "bdev_malloc_create" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 16, + "name": "M1" + }, + "method": "bdev_malloc_create" + }, + { + "method": "bdev_raid_create", + "params": { + "name": "Raid1", + "raid_level": "1", + "strip_size_kb": 8, + "base_bdevs": [ + "M0", + "M1" + ] + } + } + ] + } + ] +} diff --git a/test/bdev/raid1_test_config/rpc_json/stop.json b/test/bdev/raid1_test_config/rpc_json/stop.json new file mode 100644 index 00000000000..e4e4d7ee7de --- /dev/null +++ b/test/bdev/raid1_test_config/rpc_json/stop.json @@ -0,0 +1,27 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_raid_delete", + "params": { + "name": "Raid1" + } + }, + { + "params": { + "name": "M0" + }, + "method": "bdev_malloc_delete" + }, + { + "params": { + "name": "M1" + }, + "method": "bdev_malloc_delete" + } + ] + } + ] + } \ No newline at end of file