From 4fb0bfb804f0cba18910c28fba13aff348a42899 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Thu, 15 Sep 2022 12:36:11 +0000 Subject: [PATCH 001/101] log structure --- src/m23-ftl/zns_device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 1d0eb50..de53091 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -58,6 +58,17 @@ struct zdev_init_params{ bool force_reset; }; +struct log { + uint64_t logical_address; + uint64_t physical_address; + struct log *prev, *next; +}; + +struct log_pointer { + struct log *log_head; + struct log *log_end; +}; + int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); From 455f6bde97ae1ac7e18d626fbb8fe38a8e2d009d Mon Sep 17 00:00:00 2001 From: yssamtu Date: Thu, 15 Sep 2022 12:42:52 +0000 Subject: [PATCH 002/101] zns info structure --- src/m23-ftl/zns_device.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index de53091..e56469a 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -61,10 +61,13 @@ struct zdev_init_params{ struct log { uint64_t logical_address; uint64_t physical_address; - struct log *prev, *next; + struct log *prev; + struct log *next; }; -struct log_pointer { +struct zns_info { + int fd; + int nsid; struct log *log_head; struct log *log_end; }; From 54989615a19774ae045e43903143822ad55d3f89 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Thu, 15 Sep 2022 21:53:21 +0000 Subject: [PATCH 003/101] fixed some warnings and changed the type of nsid in struct zns_info from int to unsigned --- src/m1/device.cpp | 6 +- src/m23-ftl/m2.cpp | 123 +++++++++++++------------- src/m23-ftl/m3.cpp | 180 ++++++++++++++++++++------------------- src/m23-ftl/zns_device.h | 5 +- 4 files changed, 160 insertions(+), 154 deletions(-) diff --git a/src/m1/device.cpp b/src/m1/device.cpp index 92a49a8..aa15fd4 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -238,12 +238,12 @@ int ss_nvme_device_io_with_mdts(int fd, uint32_t nsid, uint64_t slba, uint16_t n while((errno == 0) && (completed_size < buf_size)) { uint64_t size = buf_size-completed_size < mdts_size ? buf_size-completed_size : mdts_size; int no_blocks = floor(size/lba_size); - memcpy(temp, buffer+(iteration*mdts_size), size); + memcpy(temp, (char *)buffer+(iteration*mdts_size), size); if (!read) errno = ss_nvme_device_write(fd, nsid, current_lba, no_blocks, temp, size); if (read) { errno = ss_nvme_device_read(fd,nsid,current_lba,no_blocks,temp,size); - memcpy(buffer+(iteration*mdts_size),temp, size); + memcpy((char *)buffer+(iteration*mdts_size),temp, size); } completed_size += size; current_lba += no_blocks; @@ -317,7 +317,7 @@ uint64_t get_mdts_size(int fd){ //Identify MPSMIN void *regs; regs = mmap(NULL,getpagesize(),PROT_READ,MAP_SHARED,fd,0); - mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs+0)); + mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs)); size = pow(2,mpsmin) * pow(2,ctrl.mdts); return size; diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index cc694e2..b36588a 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -32,50 +32,52 @@ SOFTWARE. extern "C" { -static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test){ +static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test) +{ int ret = -1; uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; - if(max_lba_entries < max_lbas_to_test){ + if (max_lba_entries < max_lbas_to_test) { printf("Error: not sufficient LBAs available, pass a smaller number \n"); return -1; } - const uint64_t max_lba_to_generate = (max_lba_entries - max_lbas_to_test); + const uint64_t max_lba_to_generate = max_lba_entries - max_lbas_to_test; // lets pick a random start offset - const uint64_t start_lba = (0 + (rand() % (max_lba_to_generate - 0))); + const uint64_t start_lba = 0 + rand() % (max_lba_to_generate - 0); // now starting from "s" lba, we are going to write out max_lbas_to_test LBAs - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - ith iteration - write_pattern_with_start((char*) buf, buf_size, i); - ret = zns_udevice_write(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ + write_pattern_with_start((char *)buf, buf_size, i); + ret = zns_udevice_write(my_dev, i * my_dev->lba_size_bytes, buf, buf_size); + if (ret) { printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + i * my_dev->lba_size_bytes, i - start_lba); return ret; } } printf("Writing of %lu unique LBAs OK \n", max_lbas_to_test); // otherwise all writes passed - now we test reading - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - bzero((char*) buf, buf_size); - ret = zns_udevice_read(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ + bzero((char *)buf, buf_size); + ret = zns_udevice_read(my_dev, i * my_dev->lba_size_bytes, buf, buf_size); + if (ret) { printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + i * my_dev->lba_size_bytes, i - start_lba); return ret; } // now we match - for ith pattern - if it fails it asserts - match_pattern_with_start((char*) buf, buf_size, i); + match_pattern_with_start((char *)buf, buf_size, i); } printf("Reading and matching of %lu unique LBAs OK \n", max_lbas_to_test); return 0; } -static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size){ - write_pattern((char*) buf, buf_size); +static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size) +{ + write_pattern((char *)buf, buf_size); uint64_t test_lba = 0; int ret = zns_udevice_write(dev, test_lba, buf, buf_size); - if(ret != 0){ + if (ret) { printf("Error: writing the device failed at address 0x%lx \n", test_lba); return ret; } @@ -83,16 +85,17 @@ static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_ // zero it out bzero(buf, buf_size); ret = zns_udevice_read(dev, test_lba, buf, buf_size); - if(ret != 0){ + if (ret) { printf("Error: reading the device failed at address 0x%lx \n", test_lba); return ret; } printf("%u bytes read successfully on lba 0x%lx \n", buf_size, test_lba); - match_pattern((char*) buf, buf_size); + match_pattern((char*)buf, buf_size); return 0; } -static int show_help(){ +static int show_help() +{ printf("Usage: m2 -d device_name -h -r \n"); printf("-d : /dev/nvmeXpY - in this format with the full path \n"); printf("-r : resume if the FTL can. \n"); @@ -101,12 +104,13 @@ static int show_help(){ return 0; } -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ uint64_t start, end; start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); + srand((unsigned)time(NULL) * getpid()); int ret, c; - char *zns_device_name = (char*) "nvme0n1", *test_buf = nullptr, *str1 = nullptr; + char *zns_device_name = (char *)"nvme0n1", *test_buf = nullptr, *str1 = nullptr; struct user_zns_device *my_dev = nullptr; struct zdev_init_params params; params.force_reset = true; @@ -119,54 +123,53 @@ int main(int argc, char **argv) { printf("===================================================================================== \n"); while ((c = getopt(argc, argv, "l:d:hr")) != -1) { switch (c) { - case 'h': - show_help(); - exit(0); - case 'r': - params.force_reset = false; - break; - case 'd': - str1 = strdupa(optarg); - if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); - exit(EXIT_FAILURE); - } - for (int j = 1; ; j++) { - char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { - break; - } - // if there was a valid parse, just save it - zns_device_name = token; - } - free(str1); - break; - case 'l': - params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); - exit(-1); - } - break; - default: - show_help(); + case 'h': + show_help(); + exit(0); + case 'r': + params.force_reset = false; + break; + case 'd': + str1 = strdupa(optarg); + if (!str1) { + printf("Could not parse the arguments for the device %s '\n", optarg); + exit(EXIT_FAILURE); + } + for (int j = 1; ; ++j) { + char *token = strsep(&str1, "/"); // delimited is "/" + if (token == nullptr) + break; + // if there was a valid parse, just save it + zns_device_name = token; + } + free(str1); + break; + case 'l': + params.log_zones = atoi(optarg); + if (params.log_zones < 3) { + printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); exit(-1); + } + break; + default: + show_help(); + exit(-1); } } params.name = strdup(zns_device_name); printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s\n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no"); + params.name,params.log_zones, params.gc_wmark, params.force_reset == 1 ? "yes" : "no"); ret = init_ss_zns_device(¶ms, &my_dev); assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); max_num_lba_to_test = (params.log_zones - 1) * (my_dev->tparams.zns_zone_capacity / my_dev->tparams.zns_lba_size); printf("The amount of new pages to be written would be the number of (zones - 1) / lba_size : %lu \n", max_num_lba_to_test); printf("Why? we assume one zone will eventually be taken for writing metadata, and the rest will be used for the FTL log \n"); test_buf = static_cast(calloc(1, my_dev->lba_size_bytes)); int t1 = write_read_lba0(my_dev, test_buf, my_dev->lba_size_bytes); // -1 because we have already written one LBA. - int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, (max_num_lba_to_test - 1)); + int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, max_num_lba_to_test - 1); free(test_buf); ret = deinit_ss_zns_device(my_dev); free(params.name); @@ -176,7 +179,7 @@ int main(int argc, char **argv) { printf("[stosys-result] Test 1 (write, read, and match on LBA0) : %s \n", (t1 == 0 ? " Passed" : " Failed")); printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); + printf("[stosys-stats] The elapsed time is %lu milliseconds \n", (end - start) / 1000); printf("====================================================================\n"); return ret; } diff --git a/src/m23-ftl/m3.cpp b/src/m23-ftl/m3.cpp index 8b3aed3..3585a09 100644 --- a/src/m23-ftl/m3.cpp +++ b/src/m23-ftl/m3.cpp @@ -36,53 +36,53 @@ SOFTWARE. #include "../common/utils.h" -static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) { +static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) +{ std::vector myvector; std::random_device rd; std::mt19937 g(rd()); uint64_t *tmp = nullptr; // set some values: - for (uint64_t i = 0; i < capacity; i++) { + for (uint64_t i = 0; i < capacity; ++i) myvector.push_back(i); - } - if(shuffle) { + if (shuffle) std::shuffle(myvector.begin(), myvector.end(), g); - } tmp = new uint64_t[capacity]; - for(uint64_t i = 0; i < capacity; i++){ + for (uint64_t i = 0; i < capacity; ++i) tmp[i] = myvector[i]; - } *arr = tmp; return 0; } extern "C" { -static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read){ +static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read) +{ int ret; uint64_t written_so_far = 0; - uintptr_t ptr = (uintptr_t) buf; - while (written_so_far < (uint64_t) sz) { - if(is_read == 1) { - ret = pread(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } else { - ret = pwrite(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } - if(ret < 0){ + uintptr_t ptr = (uintptr_t)buf; + while (written_so_far < (uint64_t)sz) { + if (is_read == 1) + ret = pread(fd, (void *)(ptr + written_so_far), sz - written_so_far, offset + written_so_far); + else + ret = pwrite(fd, (void *)(ptr + written_so_far), sz - written_so_far, offset + written_so_far); + if (ret < 0) { printf("file writing failed %d \n", ret); return ret; } //other add and move along - written_so_far+=ret; + written_so_far += ret; } return 0; } -static int write_complete_file(int fd, uint64_t offset, void *buf, int sz){ +static int write_complete_file(int fd, uint64_t offset, void *buf, int sz) +{ return _complete_file_io(fd, offset, buf, sz, 0); } -static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ +static int read_complete_file(int fd, uint64_t offset, void *buf, int sz) +{ return _complete_file_io(fd, offset, buf, sz, 1); } @@ -98,11 +98,12 @@ static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ */ static int wr_full_device_verify(struct user_zns_device *dev, const uint64_t *addr_list, const uint32_t list_size, - const uint32_t max_hammer_io){ + const uint32_t max_hammer_io) +{ int ret; const char *tmp_file = "./tmp-output-fulld"; - char *b1 = (char*) calloc(1, dev->lba_size_bytes); - char *b2 = (char*) calloc(1, dev->lba_size_bytes); + char *b1 = (char *)calloc(1, dev->lba_size_bytes); + char *b2 = (char *)calloc(1, dev->lba_size_bytes); assert(b1 != nullptr); assert(b2 != nullptr); @@ -114,7 +115,7 @@ static int wr_full_device_verify(struct user_zns_device *dev, } // allocate this side file to the full capacity ret = posix_fallocate(fd, 0, dev->capacity_bytes); - if(ret){ + if (ret) { printf("Error: fallocate failed, ret %d ", ret); return -1; } @@ -123,38 +124,38 @@ static int wr_full_device_verify(struct user_zns_device *dev, const int min = 0; const int max = dev->lba_size_bytes; //initialize the device, otherwise we may have indexes where there is random garbage in both cases - for(uint32_t i = 0; i < list_size; i++){ - uint64_t woffset = (addr_list[i]) * dev->lba_size_bytes; + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t woffset = addr_list[i] * dev->lba_size_bytes; //random offset within the page and just write some random stuff = this is to make a unique I/O pattern - b1[(min + (rand() % (max - min)))] = (char) rand(); + b1[min + rand() % (max - min)] = (char)rand(); // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ + if (ret) { printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); goto done; } ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ + if (ret) { printf("Error: file writing failed at offset 0x%lx \n", woffset); goto done; } } printf("the ZNS user device has been written (ONCE) completely OK\n"); - if(max_hammer_io > 0){ + if (max_hammer_io > 0) { printf("Hammering some random LBAs %d times \n", max_hammer_io); - for(uint32_t i = 0; i < max_hammer_io; i++){ + for (uint32_t i = 0; i < max_hammer_io; ++i) { // we should not generate offset which is within the list_size - uint64_t woffset = (addr_list[ 0 + (rand() % (list_size - 0))]) * dev->lba_size_bytes; + uint64_t woffset = addr_list[0 + rand() % (list_size - 0)] * dev->lba_size_bytes; //random offset within the page and just write some random stuff, like i - b1[(min + (rand() % (max - min)))] = (char) rand(); + b1[min + rand() % (max - min)] = (char)rand(); // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ + if (ret) { printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); goto done; } ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ + if (ret) { printf("Error: file writing failed at offset 0x%lx \n", woffset); goto done; } @@ -166,21 +167,22 @@ static int wr_full_device_verify(struct user_zns_device *dev, write_pattern(b1, dev->lba_size_bytes); write_pattern(b2, dev->lba_size_bytes); // and now read the whole device and compare the content with the file - for(uint32_t i = 0; i < list_size; i++){ - uint64_t roffset = (addr_list[i]) * dev->lba_size_bytes; + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t roffset = addr_list[i] * dev->lba_size_bytes; // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_read(dev, roffset, b1, dev->lba_size_bytes); assert(ret == 0); ret = read_complete_file(fd, roffset, b2, dev->lba_size_bytes); assert(ret == 0); //now both of these should match - for(uint32_t j = 0; j < dev->lba_size_bytes; j++) - if(b1[j] != b2[j]){ + for (uint32_t j = 0; j < dev->lba_size_bytes; ++j) { + if (b1[j] != b2[j]) { printf("ERROR: buffer mismatch at i %d and j %d , address is 0%lx expecting %x found %x \n", i, j, roffset, b2[j], b1[j]); ret = -EINVAL; goto done; } + } } printf("Verification passed on the while device \n"); @@ -189,13 +191,13 @@ static int wr_full_device_verify(struct user_zns_device *dev, free(b2); close(fd); ret = remove(tmp_file); - if(ret != 0){ + if (ret) printf("Error: file deleting failed with ret %d \n", ret); - } return ret; } -static int show_help(){ +static int show_help() +{ printf("Usage: m2 -d device_name -h -r \n"); printf("-d : /dev/nvmeXpY - in this format with the full path \n"); printf("-r : resume if the FTL can. \n"); @@ -206,12 +208,13 @@ static int show_help(){ return 0; } -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ uint64_t start, end; start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); + srand((unsigned)time(NULL) * getpid()); int ret, c; - char *zns_device_name = (char*) "nvme0n1", *str1 = nullptr; + char *zns_device_name = (char *)"nvme0n1", *str1 = nullptr; struct user_zns_device *my_dev = nullptr; uint64_t *seq_addresses = nullptr, *random_addresses = nullptr; uint32_t to_hammer_lba = 10000; @@ -227,58 +230,57 @@ int main(int argc, char **argv) { printf("===================================================================================== \n"); while ((c = getopt(argc, argv, "o:m:l:d:w:hr")) != -1) { switch (c) { - case 'h': - show_help(); - exit(0); - case 'r': - params.force_reset = false; - break; - case 'o': - to_hammer_lba = atoi(optarg); - break; - case 'd': - str1 = strdupa(optarg); - if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); - exit(EXIT_FAILURE); - } - for (int j = 1; ; j++) { - char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { - break; - } - // if there was a valid parse, just save it - zns_device_name = token; - } - free(str1); - break; - case 'l': - params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); - exit(-1); - } - break; - case 'w': - params.gc_wmark = atoi(optarg); - if (params.gc_wmark < 1){ - printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); - exit(-1); - } - break; - default: - show_help(); + case 'h': + show_help(); + exit(0); + case 'r': + params.force_reset = false; + break; + case 'o': + to_hammer_lba = atoi(optarg); + break; + case 'd': + str1 = strdupa(optarg); + if (!str1) { + printf("Could not parse the arguments for the device %s '\n", optarg); + exit(EXIT_FAILURE); + } + for (int j = 1; ; ++j) { + char *token = strsep(&str1, "/"); // delimited is "/" + if (token == nullptr) + break; + // if there was a valid parse, just save it + zns_device_name = token; + } + free(str1); + break; + case 'l': + params.log_zones = atoi(optarg); + if (params.log_zones < 3) { + printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); exit(-1); + } + break; + case 'w': + params.gc_wmark = atoi(optarg); + if (params.gc_wmark < 1) { + printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); + exit(-1); + } + break; + default: + show_help(); + exit(-1); } } params.name = strdup(zns_device_name); printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s hammer-time %d \n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no", to_hammer_lba); + params.name, params.log_zones, params.gc_wmark, params.force_reset == 1 ? "yes" : "no", to_hammer_lba); ret = init_ss_zns_device(¶ms, &my_dev); assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; // get a sequential LBA address list get_sequence_as_array(max_lba_entries, &seq_addresses, false); @@ -302,7 +304,7 @@ int main(int argc, char **argv) { printf("[stosys-result] Test 2 randomized write, read, and match (full device) : %s \n", (t2 == 0 ? " Passed" : " Failed")); printf("[stosys-result] Test 3 randomized write, read, and match (full device, hammer %-6u) : %s \n", to_hammer_lba, (t3 == 0 ? " Passed" : " Failed")); printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); + printf("[stosys-stats] The elapsed time is %lu milliseconds \n", (end - start) / 1000); printf("====================================================================\n"); return ret; } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index e56469a..6556843 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -24,6 +24,7 @@ SOFTWARE. #define STOSYS_PROJECT_ZNS_DEVICE_H #include +#include extern "C"{ //https://github.com/mplulu/google-breakpad/issues/481 - taken from here @@ -51,7 +52,7 @@ struct user_zns_device { void *_private; }; -struct zdev_init_params{ +struct zdev_init_params { char *name; int log_zones; int gc_wmark; @@ -67,7 +68,7 @@ struct log { struct zns_info { int fd; - int nsid; + unsigned nsid; struct log *log_head; struct log *log_end; }; From a6791c3c78c60df7ef56002f8c42c1e5c074def3 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Thu, 15 Sep 2022 22:06:30 +0000 Subject: [PATCH 004/101] revised some minor misplace errors --- src/m23-ftl/m2.cpp | 2 +- src/m23-ftl/zns_device.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index b36588a..1b124a9 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -90,7 +90,7 @@ static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_ return ret; } printf("%u bytes read successfully on lba 0x%lx \n", buf_size, test_lba); - match_pattern((char*)buf, buf_size); + match_pattern((char *)buf, buf_size); return 0; } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 6556843..ac09086 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -24,7 +24,6 @@ SOFTWARE. #define STOSYS_PROJECT_ZNS_DEVICE_H #include -#include extern "C"{ //https://github.com/mplulu/google-breakpad/issues/481 - taken from here From ddfed4c365b47b47af793c58a3a4a032d86eb83f Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 16 Sep 2022 07:14:25 +0000 Subject: [PATCH 005/101] changed log to metadata --- src/m23-ftl/zns_device.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index ac09086..533cf06 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -25,7 +25,7 @@ SOFTWARE. #include -extern "C"{ +extern "C" { //https://github.com/mplulu/google-breakpad/issues/481 - taken from here #define typeof __typeof__ #define container_of(ptr, type, member) ({ \ @@ -58,18 +58,18 @@ struct zdev_init_params { bool force_reset; }; -struct log { +struct metadata { uint64_t logical_address; uint64_t physical_address; - struct log *prev; - struct log *next; + struct metadata *prev; + struct metadata *next; }; struct zns_info { int fd; - unsigned nsid; - struct log *log_head; - struct log *log_end; + uint32_t nsid; + struct metadata *metadata_head; + struct metadata *metadata_end; }; int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); From 740b432a8b9a4f791515ab6851b6c307d83d3aff Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 16 Sep 2022 20:40:57 +0000 Subject: [PATCH 006/101] init function update --- src/m23-ftl/zns_device.cpp | 68 ++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 23b805f..417195f 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -20,23 +20,79 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "zns_device.h" + +#include +#include +#include #include +#include +#include "zns_device.h" extern "C" { -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { - return -ENOSYS; +int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) +{ + *my_dev = (struct user_zns_device *)calloc(1, sizeof(struct user_zns_device)); + (*my_dev)->_private = calloc(1, sizeof(struct zns_info)); + struct zns_info *info = (struct zns_info *)(*my_dev)->_private; + // get fd + info->fd = nvme_open(params->name); + if (info->fd < 0) { + printf("Device %s opened failed %d errno %d\n", params->name, info->fd, errno); + return 1; + } + // get nsid + int ret = nvme_get_nsid(info->fd, &info->nsid); + if (ret) { + printf("Error: failed to retrieve the namespace id %d\n", ret); + return 1; + } + // reset device + if (params->force_reset) { + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0, true, NVME_ZNS_ZSA_RESET, 0, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return 1; + } + } + // get zns_lba_size + struct nvme_id_ns ns; + ret = nvme_identify_ns(info->fd, info->nsid, &ns); + if (ret) { + printf("Error: failed to retrieve the nvme identify namespace %d\n", ret); + return 1; + } + (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; + (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; + // get zns_zone_capacity + struct nvme_zns_id_ns data; + nvme_zns_identify_ns(info->fd, info->nsid, &data); + (*my_dev)->tparams.zns_zone_capacity = data.lbafe[ns.flbas & 0xF].zsze * (*my_dev)->tparams.zns_lba_size; + (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_zone_capacity; + // get zns_num_zones + struct nvme_zone_report zns_report; + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, sizeof(zns_report), &zns_report); + if (ret) { + printf("Failed to report zones, ret %d \n", ret); + return 1; + } + (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); + return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) +{ return -ENOSYS; } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) +{ return -ENOSYS; } -int deinit_ss_zns_device(struct user_zns_device *my_dev){ +int deinit_ss_zns_device(struct user_zns_device *my_dev) +{ + free(my_dev->_private); + free(my_dev); return -ENOSYS; } } From bfad1c58bdbb43773ffd4daaafe751abbac100e7 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Fri, 16 Sep 2022 20:41:35 +0000 Subject: [PATCH 007/101] Changes in zns_info structure --- src/m23-ftl/zns_device.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 533cf06..ccc2d5a 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -25,6 +25,8 @@ SOFTWARE. #include +#define METADATA_MAP_LEN 4000 + extern "C" { //https://github.com/mplulu/google-breakpad/issues/481 - taken from here #define typeof __typeof__ @@ -48,7 +50,7 @@ struct user_zns_device { uint64_t capacity_bytes; // total user device capacity struct zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions // your own private data - void *_private; + void *_private; //Points to zns_info }; struct zdev_init_params { @@ -58,20 +60,33 @@ struct zdev_init_params { bool force_reset; }; -struct metadata { + +struct metadata_log_map { + //FIXME: Add No of blocks written as well. uint64_t logical_address; uint64_t physical_address; - struct metadata *prev; - struct metadata *next; + struct metadata_log_map *next; }; struct zns_info { + //Fixed values int fd; + int gc_trigger; uint32_t nsid; - struct metadata *metadata_head; - struct metadata *metadata_end; + uint32_t nvm_page_size; + uint32_t zone_capacity; + uint32_t no_of_zones; + uint32_t no_of_log_zones; + uint64_t upper_logical_addr_bound; + + //Log zone maintainance + uint32_t no_of_used_log_zones; //Keep track of used log zones + uint64_t curr_log_zone_starting_addr; //Point to current log zone starting address + struct metadata_log_map map[METADATA_LOG_LEN]; //Hashmap to store log }; + + int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); From 7245aedbd1b6b78332d94f8beb4c24a00867deaa Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Fri, 16 Sep 2022 20:43:50 +0000 Subject: [PATCH 008/101] [WIP] Implement hash funciton and resolved conflicts --- src/m23-ftl/zns_device.cpp | 75 +++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 417195f..b96160b 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -80,13 +80,74 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) -{ - return -ENOSYS; + + +int hash_function(uint64_t key, int index) { + } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) -{ - return -ENOSYS; + +void update_log_map(metadata_log_map map, uint64_t logical_addr, uint64_t physical_addr) { + int index = hash_function(logical_addr); + +} + +int lookup_log_map(metadata_log_map map, uint64_t logical_addr, uint64_t *physical_addr) { + +} + +int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t zslba, uint64_t *addr_written) { + int errno; + void *mbuffer = NULL; + long long mbuffer_size = 0; + uint32_t number_of_pages; //calc from size and page_size + //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. + errno = nvme_zns_append(ptr->fd, ptr->nsid, ptr->zslba, number_of_pages, 0, + 0, 0, 0, size, buffer, mbuffer_size, mbuffer, addr_written); + ss_nvme_show_status(errno); + return errno; +} + + +int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t size) { + int errno; + void *mbuffer = NULL; + long long mbuffer_size = 0; + uint32_t number_of_pages; + errno = nvme_read(ptr->fd, ptr->nsid, address, number_of_pages, 0, 0, 0, + 0, 0, size, buffer, mbuffer_size, mbuffer); + ss_nvme_show_status(errno); + return errno; +} + + +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ + int errno; + uint64_t physical_addr; + + //FIXME: Proision for contiguos block read, but not written contiguous + //Get physical addr mapped for the provided logical addr + errno = lookup_map(my_dev->_private->map, address, &physical_addr); + if(errno != 0) + return errno; + + + errno = read_data_from_nvme(my_dev->_private, physical_addr, buffer, size); + + return errno; +} + + +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ + int errno; + uint64_t *zone_slba, *physical_page_addr; + + errno = append_data_to_log_zone(my_dec->_private, buffer, size, physical_page_addr); + if(errno != 0) + return 0; + + update_ftl_map(my_dec->_private->metadata_log_map, address, physical_page_addr); + return errno; +>>>>>>> Stashed changes } int deinit_ss_zns_device(struct user_zns_device *my_dev) @@ -95,4 +156,6 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) free(my_dev); return -ENOSYS; } + + } From 75457f989fc0d891d9f97cc1f1679fb14d9d3514 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Fri, 16 Sep 2022 20:50:27 +0000 Subject: [PATCH 009/101] removal of 'stashed chages' --- src/m23-ftl/zns_device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index b96160b..0287f59 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -147,7 +147,6 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *bu update_ftl_map(my_dec->_private->metadata_log_map, address, physical_page_addr); return errno; ->>>>>>> Stashed changes } int deinit_ss_zns_device(struct user_zns_device *my_dev) From 23b91f8e94f401d0716ee1f5257e96ac934aee44 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 16 Sep 2022 21:58:02 +0000 Subject: [PATCH 010/101] modified init function --- src/m23-ftl/zns_device.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 0287f59..f6ccc1d 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -35,6 +35,10 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * *my_dev = (struct user_zns_device *)calloc(1, sizeof(struct user_zns_device)); (*my_dev)->_private = calloc(1, sizeof(struct zns_info)); struct zns_info *info = (struct zns_info *)(*my_dev)->_private; + // get gc_trigger + info->gc_trigger = params->gc_wmark; + // get no_of_log_zones + info->no_of_log_zones = params->log_zones; // get fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -55,7 +59,7 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return 1; } } - // get zns_lba_size + // get zns_lba_size lba_size_bytes nvm_page_size struct nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); if (ret) { @@ -64,12 +68,14 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * } (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; - // get zns_zone_capacity + info->nvm_page_size = (*my_dev)->tparams.zns_lba_size; + // get zns_zone_capacity capacity_bytes zones_capacity struct nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); (*my_dev)->tparams.zns_zone_capacity = data.lbafe[ns.flbas & 0xF].zsze * (*my_dev)->tparams.zns_lba_size; (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_zone_capacity; - // get zns_num_zones + info->zone_capacity = (*my_dev)->tparams.zns_zone_capacity; + // get zns_num_zones no_of_zones struct nvme_zone_report zns_report; ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, sizeof(zns_report), &zns_report); if (ret) { @@ -77,6 +83,13 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return 1; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); + info->no_of_zones = (*my_dev)->tparams.zns_num_zones; + // set no_of_used_log_zones + info->no_of_used_log_zones = 0; + // set curr_log_zone_starting_addr + info->curr_log_zone_starting_addr = 0; + // init upper_logical_addr_bound + // init map return 0; } From 723984ebddbce1c86386e0aab4fe2b2aec55496e Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Fri, 16 Sep 2022 23:40:27 +0000 Subject: [PATCH 011/101] Added hash function support, read and support --- src/m23-ftl/zns_device.cpp | 93 +++++++++++++++++++++++++++----------- src/m23-ftl/zns_device.h | 11 ++++- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index f6ccc1d..9b8b8f1 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -95,32 +95,65 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * -int hash_function(uint64_t key, int index) { - +int hash_function(uint64_t key) { + return key%METADATA_LOG_MAP_LEN; } -void update_log_map(metadata_log_map map, uint64_t logical_addr, uint64_t physical_addr) { - int index = hash_function(logical_addr); - +void update_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t physical_address) { + int index = hash_function(logical_address); + struct metadata_log_map *head, *entry; + + entry = (metadata_log_map *) malloc(sizeof(metadata_log_map)); + entry->physical_address = physical_address; + entry->logical_address = logical_address; + if(map[index] == NULL) + map[index] = entry; + else { + head = map[index]; + while(head->next != NULL) + head = head->next; + head->next = entry; + } } -int lookup_log_map(metadata_log_map map, uint64_t logical_addr, uint64_t *physical_addr) { +int lookup_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t *physical_address) { + int index = hash_function(logical_address); + struct metadata_log_map *head; + int err; + err = -1; + head = map[index]; + while(head != NULL) { + if(head->logical_address == logical_address) { + *physical_address = head->physical_address; + err = 0; + break; + } + head = head->next; + } + return err; } -int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t zslba, uint64_t *addr_written) { +int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t *address_written) { int errno; void *mbuffer = NULL; long long mbuffer_size = 0; uint32_t number_of_pages; //calc from size and page_size //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. - errno = nvme_zns_append(ptr->fd, ptr->nsid, ptr->zslba, number_of_pages, 0, - 0, 0, 0, size, buffer, mbuffer_size, mbuffer, addr_written); - ss_nvme_show_status(errno); + errno = nvme_zns_append(ptr->fd, ptr->nsid, ptr->curr_log_zone_starting_addr, number_of_pages, 0, + 0, 0, 0, size, buffer, mbuffer_size, mbuffer, (long long unsigned int*) address_written); + //ss_nvme_show_status(errno); return errno; } +//FIXME: Update log zone if current zone cant support current write req +/* +int check_update_curr_log_zone_validity(zns_info *ptr, uint32_t size) { + int errno; + if ptr +} +*/ int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t size) { int errno; void *mbuffer = NULL; @@ -128,38 +161,46 @@ int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t uint32_t number_of_pages; errno = nvme_read(ptr->fd, ptr->nsid, address, number_of_pages, 0, 0, 0, 0, 0, size, buffer, mbuffer_size, mbuffer); - ss_nvme_show_status(errno); + //ss_nvme_show_status(errno); return errno; } int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - int errno; - uint64_t physical_addr; - + int err; + uint64_t *physical_address; + zns_info *info; + info = (zns_info *) my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr - errno = lookup_map(my_dev->_private->map, address, &physical_addr); - if(errno != 0) - return errno; + err = lookup_log_map(info->map, address, physical_address); + if(err != 0) + return err; - errno = read_data_from_nvme(my_dev->_private, physical_addr, buffer, size); + errno = read_data_from_nvme(info, *physical_address, buffer, size); - return errno; + return err; } int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - int errno; - uint64_t *zone_slba, *physical_page_addr; - - errno = append_data_to_log_zone(my_dec->_private, buffer, size, physical_page_addr); + int err; + uint64_t *physical_page_address; + zns_info *info; + info = (zns_info *) my_dev->_private; + /* + errno = check_update_curr_log_zone_validity(my_dev->_private) if(errno != 0) - return 0; + return 0; + */ + + err = append_data_to_log_zone(info, buffer, size, physical_page_address); + if(err != 0) + return err; - update_ftl_map(my_dec->_private->metadata_log_map, address, physical_page_addr); - return errno; + update_log_map(info->map, address, *physical_page_address); + return err; } int deinit_ss_zns_device(struct user_zns_device *my_dev) diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index ccc2d5a..2e2300f 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -25,7 +25,7 @@ SOFTWARE. #include -#define METADATA_MAP_LEN 4000 +#define METADATA_LOG_MAP_LEN 4000 extern "C" { //https://github.com/mplulu/google-breakpad/issues/481 - taken from here @@ -77,15 +77,22 @@ struct zns_info { uint32_t zone_capacity; uint32_t no_of_zones; uint32_t no_of_log_zones; + //Future use uint64_t upper_logical_addr_bound; //Log zone maintainance uint32_t no_of_used_log_zones; //Keep track of used log zones uint64_t curr_log_zone_starting_addr; //Point to current log zone starting address - struct metadata_log_map map[METADATA_LOG_LEN]; //Hashmap to store log + struct metadata_log_map *map[METADATA_LOG_MAP_LEN]; //Hashmap to store log }; +int hash_function(uint64_t key); +void update_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t physical_address); +int lookup_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t *physical_address); +int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t *address_written); + + int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); From 2e137d1c7ddc223164535c45756bd1894daa719e Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sat, 17 Sep 2022 18:12:49 +0000 Subject: [PATCH 012/101] Working patch 1 --- src/m23-ftl/zns_device.cpp | 77 ++++++++++++++++++++++++++------------ src/m23-ftl/zns_device.h | 3 +- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 9b8b8f1..a89f592 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -73,7 +73,7 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * struct nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); (*my_dev)->tparams.zns_zone_capacity = data.lbafe[ns.flbas & 0xF].zsze * (*my_dev)->tparams.zns_lba_size; - (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_zone_capacity; + (*my_dev)->capacity_bytes = ((*my_dev)->tparams.zns_num_zones - (info->no_of_log_zones))*(*my_dev)->tparams.zns_zone_capacity; //FIXME: Capacity bytes is (total_no_zones - log_zones) * zone_size; info->zone_capacity = (*my_dev)->tparams.zns_zone_capacity; // get zns_num_zones no_of_zones struct nvme_zone_report zns_report; @@ -90,6 +90,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->curr_log_zone_starting_addr = 0; // init upper_logical_addr_bound // init map + // + info->no_of_pages_per_zone = info->zone_capacity/info->nvm_page_size; return 0; } @@ -101,18 +103,28 @@ int hash_function(uint64_t key) { void update_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t physical_address) { int index = hash_function(logical_address); - struct metadata_log_map *head, *entry; - + + struct metadata_log_map *entry; entry = (metadata_log_map *) malloc(sizeof(metadata_log_map)); entry->physical_address = physical_address; entry->logical_address = logical_address; + entry->next = NULL; + + //Fill in hashmap if(map[index] == NULL) map[index] = entry; + else if(map[index]->logical_address == logical_address) + map[index] = entry; else { + struct metadata_log_map *head; head = map[index]; - while(head->next != NULL) + while(head->next != NULL) { + //Break if next entry is same logical address + if (head->next->logical_address == logical_address) + break; head = head->next; - head->next = entry; + } + head->next = entry; } } @@ -138,7 +150,7 @@ int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t int errno; void *mbuffer = NULL; long long mbuffer_size = 0; - uint32_t number_of_pages; //calc from size and page_size + uint32_t number_of_pages = (size/ptr->nvm_page_size)-1; //calc from size and page_size //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. errno = nvme_zns_append(ptr->fd, ptr->nsid, ptr->curr_log_zone_starting_addr, number_of_pages, 0, 0, 0, 0, size, buffer, mbuffer_size, mbuffer, (long long unsigned int*) address_written); @@ -158,7 +170,7 @@ int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t int errno; void *mbuffer = NULL; long long mbuffer_size = 0; - uint32_t number_of_pages; + uint32_t number_of_pages = (size/ptr->nvm_page_size) - 1; errno = nvme_read(ptr->fd, ptr->nsid, address, number_of_pages, 0, 0, 0, 0, 0, size, buffer, mbuffer_size, mbuffer); //ss_nvme_show_status(errno); @@ -166,19 +178,25 @@ int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t } + +void check_to_trigger_GC(struct zns_info *info, uint64_t last_log_append_addr) { + //Check if current log zone is ended, then change to next log zone + if((last_log_append_addr - info->curr_log_zone_starting_addr) == info->no_of_pages_per_zone - 1) + info->curr_log_zone_starting_addr = last_log_append_addr + 1; +} + int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ int err; - uint64_t *physical_address; + uint64_t physical_address; zns_info *info; info = (zns_info *) my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr - err = lookup_log_map(info->map, address, physical_address); + err = lookup_log_map(info->map, address, &physical_address); if(err != 0) return err; - - errno = read_data_from_nvme(info, *physical_address, buffer, size); + errno = read_data_from_nvme(info, physical_address, buffer, size); return err; } @@ -186,28 +204,41 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buf int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ int err; - uint64_t *physical_page_address; + uint64_t physical_page_address; zns_info *info; info = (zns_info *) my_dev->_private; - /* - errno = check_update_curr_log_zone_validity(my_dev->_private) - if(errno != 0) - return 0; - */ - - err = append_data_to_log_zone(info, buffer, size, physical_page_address); + err = append_data_to_log_zone(info, buffer, size, &physical_page_address); if(err != 0) return err; - - update_log_map(info->map, address, *physical_page_address); + check_to_trigger_GC(info, physical_page_address); + update_log_map(info->map, address, physical_page_address); return err; } +void clear_entry(struct metadata_log_map *entry) { + if(entry == NULL) + return; + clear_entry(entry->next); + free(entry); + return; +} + +void free_hashmap(struct metadata_log_map *map[METADATA_LOG_MAP_LEN]) { + for(int i = 0; i < METADATA_LOG_MAP_LEN; i++) + clear_entry(map[i]); +} + int deinit_ss_zns_device(struct user_zns_device *my_dev) { - free(my_dev->_private); + int err; + struct zns_info *info; + info = (zns_info *) my_dev->_private; + + //free hashmap + free_hashmap(info->map); + free(info); free(my_dev); - return -ENOSYS; + return err; } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 2e2300f..2552636 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -25,7 +25,7 @@ SOFTWARE. #include -#define METADATA_LOG_MAP_LEN 4000 +#define METADATA_LOG_MAP_LEN 9999 extern "C" { //https://github.com/mplulu/google-breakpad/issues/481 - taken from here @@ -75,6 +75,7 @@ struct zns_info { uint32_t nsid; uint32_t nvm_page_size; uint32_t zone_capacity; + uint32_t no_of_pages_per_zone; uint32_t no_of_zones; uint32_t no_of_log_zones; //Future use From 6f6896a39529dc77e60439dfe6685dfe8ff89f01 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 18 Sep 2022 10:37:27 +0000 Subject: [PATCH 013/101] changed the definition of user_zns_device::capacity_bytes --- src/m23-ftl/m2.cpp | 125 +++++++------ src/m23-ftl/m3.cpp | 180 +++++++++---------- src/m23-ftl/zns_device.cpp | 357 +++++++++++++++++++------------------ src/m23-ftl/zns_device.h | 40 +---- 4 files changed, 336 insertions(+), 366 deletions(-) diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index 1b124a9..b0f3f4d 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -32,52 +32,50 @@ SOFTWARE. extern "C" { -static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test) -{ +static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test){ int ret = -1; uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; - if (max_lba_entries < max_lbas_to_test) { + if(max_lba_entries < max_lbas_to_test){ printf("Error: not sufficient LBAs available, pass a smaller number \n"); return -1; } - const uint64_t max_lba_to_generate = max_lba_entries - max_lbas_to_test; + const uint64_t max_lba_to_generate = (max_lba_entries - max_lbas_to_test); // lets pick a random start offset - const uint64_t start_lba = 0 + rand() % (max_lba_to_generate - 0); + const uint64_t start_lba = (0 + (rand() % (max_lba_to_generate - 0))); // now starting from "s" lba, we are going to write out max_lbas_to_test LBAs - for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { + for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ // make a unique pattern for each write - ith iteration - write_pattern_with_start((char *)buf, buf_size, i); - ret = zns_udevice_write(my_dev, i * my_dev->lba_size_bytes, buf, buf_size); - if (ret) { + write_pattern_with_start((char*) buf, buf_size, i); + ret = zns_udevice_write(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); + if(ret != 0){ printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - i * my_dev->lba_size_bytes, i - start_lba); + (i * my_dev->lba_size_bytes), (i - start_lba)); return ret; } } printf("Writing of %lu unique LBAs OK \n", max_lbas_to_test); // otherwise all writes passed - now we test reading - for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { + for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ // make a unique pattern for each write - bzero((char *)buf, buf_size); - ret = zns_udevice_read(my_dev, i * my_dev->lba_size_bytes, buf, buf_size); - if (ret) { + bzero((char*) buf, buf_size); + ret = zns_udevice_read(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); + if(ret != 0){ printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - i * my_dev->lba_size_bytes, i - start_lba); + (i * my_dev->lba_size_bytes), (i - start_lba)); return ret; } // now we match - for ith pattern - if it fails it asserts - match_pattern_with_start((char *)buf, buf_size, i); + match_pattern_with_start((char*) buf, buf_size, i); } printf("Reading and matching of %lu unique LBAs OK \n", max_lbas_to_test); return 0; } -static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size) -{ - write_pattern((char *)buf, buf_size); +static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size){ + write_pattern((char*) buf, buf_size); uint64_t test_lba = 0; int ret = zns_udevice_write(dev, test_lba, buf, buf_size); - if (ret) { + if(ret != 0){ printf("Error: writing the device failed at address 0x%lx \n", test_lba); return ret; } @@ -85,17 +83,16 @@ static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_ // zero it out bzero(buf, buf_size); ret = zns_udevice_read(dev, test_lba, buf, buf_size); - if (ret) { + if(ret != 0){ printf("Error: reading the device failed at address 0x%lx \n", test_lba); return ret; } printf("%u bytes read successfully on lba 0x%lx \n", buf_size, test_lba); - match_pattern((char *)buf, buf_size); + match_pattern((char*) buf, buf_size); return 0; } -static int show_help() -{ +static int show_help(){ printf("Usage: m2 -d device_name -h -r \n"); printf("-d : /dev/nvmeXpY - in this format with the full path \n"); printf("-r : resume if the FTL can. \n"); @@ -104,13 +101,12 @@ static int show_help() return 0; } -int main(int argc, char **argv) -{ +int main(int argc, char **argv) { uint64_t start, end; start = microseconds_since_epoch(); - srand((unsigned)time(NULL) * getpid()); + srand( (unsigned) time(NULL) * getpid()); int ret, c; - char *zns_device_name = (char *)"nvme0n1", *test_buf = nullptr, *str1 = nullptr; + char *zns_device_name = (char*) "nvme0n1", *test_buf = nullptr, *str1 = nullptr; struct user_zns_device *my_dev = nullptr; struct zdev_init_params params; params.force_reset = true; @@ -123,53 +119,54 @@ int main(int argc, char **argv) printf("===================================================================================== \n"); while ((c = getopt(argc, argv, "l:d:hr")) != -1) { switch (c) { - case 'h': - show_help(); - exit(0); - case 'r': - params.force_reset = false; - break; - case 'd': - str1 = strdupa(optarg); - if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); - exit(EXIT_FAILURE); - } - for (int j = 1; ; ++j) { - char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) - break; - // if there was a valid parse, just save it - zns_device_name = token; - } - free(str1); - break; - case 'l': - params.log_zones = atoi(optarg); - if (params.log_zones < 3) { - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + case 'h': + show_help(); + exit(0); + case 'r': + params.force_reset = false; + break; + case 'd': + str1 = strdupa(optarg); + if (!str1) { + printf("Could not parse the arguments for the device %s '\n", optarg); + exit(EXIT_FAILURE); + } + for (int j = 1; ; j++) { + char *token = strsep(&str1, "/"); // delimited is "/" + if (token == nullptr) { + break; + } + // if there was a valid parse, just save it + zns_device_name = token; + } + free(str1); + break; + case 'l': + params.log_zones = atoi(optarg); + if (params.log_zones < 3){ + printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + exit(-1); + } + break; + default: + show_help(); exit(-1); - } - break; - default: - show_help(); - exit(-1); } } params.name = strdup(zns_device_name); printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s\n", - params.name,params.log_zones, params.gc_wmark, params.force_reset == 1 ? "yes" : "no"); + params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no"); ret = init_ss_zns_device(¶ms, &my_dev); assert (ret == 0); - assert(my_dev->lba_size_bytes); - assert(my_dev->capacity_bytes); + assert(my_dev->lba_size_bytes != 0); + assert(my_dev->capacity_bytes != 0); max_num_lba_to_test = (params.log_zones - 1) * (my_dev->tparams.zns_zone_capacity / my_dev->tparams.zns_lba_size); printf("The amount of new pages to be written would be the number of (zones - 1) / lba_size : %lu \n", max_num_lba_to_test); printf("Why? we assume one zone will eventually be taken for writing metadata, and the rest will be used for the FTL log \n"); test_buf = static_cast(calloc(1, my_dev->lba_size_bytes)); int t1 = write_read_lba0(my_dev, test_buf, my_dev->lba_size_bytes); // -1 because we have already written one LBA. - int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, max_num_lba_to_test - 1); + int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, (max_num_lba_to_test - 1)); free(test_buf); ret = deinit_ss_zns_device(my_dev); free(params.name); @@ -179,8 +176,8 @@ int main(int argc, char **argv) printf("[stosys-result] Test 1 (write, read, and match on LBA0) : %s \n", (t1 == 0 ? " Passed" : " Failed")); printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", (end - start) / 1000); + printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); printf("====================================================================\n"); return ret; } -} \ No newline at end of file +} diff --git a/src/m23-ftl/m3.cpp b/src/m23-ftl/m3.cpp index 3585a09..8b3aed3 100644 --- a/src/m23-ftl/m3.cpp +++ b/src/m23-ftl/m3.cpp @@ -36,53 +36,53 @@ SOFTWARE. #include "../common/utils.h" -static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) -{ +static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) { std::vector myvector; std::random_device rd; std::mt19937 g(rd()); uint64_t *tmp = nullptr; // set some values: - for (uint64_t i = 0; i < capacity; ++i) + for (uint64_t i = 0; i < capacity; i++) { myvector.push_back(i); - if (shuffle) + } + if(shuffle) { std::shuffle(myvector.begin(), myvector.end(), g); + } tmp = new uint64_t[capacity]; - for (uint64_t i = 0; i < capacity; ++i) + for(uint64_t i = 0; i < capacity; i++){ tmp[i] = myvector[i]; + } *arr = tmp; return 0; } extern "C" { -static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read) -{ +static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read){ int ret; uint64_t written_so_far = 0; - uintptr_t ptr = (uintptr_t)buf; - while (written_so_far < (uint64_t)sz) { - if (is_read == 1) - ret = pread(fd, (void *)(ptr + written_so_far), sz - written_so_far, offset + written_so_far); - else - ret = pwrite(fd, (void *)(ptr + written_so_far), sz - written_so_far, offset + written_so_far); - if (ret < 0) { + uintptr_t ptr = (uintptr_t) buf; + while (written_so_far < (uint64_t) sz) { + if(is_read == 1) { + ret = pread(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); + } else { + ret = pwrite(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); + } + if(ret < 0){ printf("file writing failed %d \n", ret); return ret; } //other add and move along - written_so_far += ret; + written_so_far+=ret; } return 0; } -static int write_complete_file(int fd, uint64_t offset, void *buf, int sz) -{ +static int write_complete_file(int fd, uint64_t offset, void *buf, int sz){ return _complete_file_io(fd, offset, buf, sz, 0); } -static int read_complete_file(int fd, uint64_t offset, void *buf, int sz) -{ +static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ return _complete_file_io(fd, offset, buf, sz, 1); } @@ -98,12 +98,11 @@ static int read_complete_file(int fd, uint64_t offset, void *buf, int sz) */ static int wr_full_device_verify(struct user_zns_device *dev, const uint64_t *addr_list, const uint32_t list_size, - const uint32_t max_hammer_io) -{ + const uint32_t max_hammer_io){ int ret; const char *tmp_file = "./tmp-output-fulld"; - char *b1 = (char *)calloc(1, dev->lba_size_bytes); - char *b2 = (char *)calloc(1, dev->lba_size_bytes); + char *b1 = (char*) calloc(1, dev->lba_size_bytes); + char *b2 = (char*) calloc(1, dev->lba_size_bytes); assert(b1 != nullptr); assert(b2 != nullptr); @@ -115,7 +114,7 @@ static int wr_full_device_verify(struct user_zns_device *dev, } // allocate this side file to the full capacity ret = posix_fallocate(fd, 0, dev->capacity_bytes); - if (ret) { + if(ret){ printf("Error: fallocate failed, ret %d ", ret); return -1; } @@ -124,38 +123,38 @@ static int wr_full_device_verify(struct user_zns_device *dev, const int min = 0; const int max = dev->lba_size_bytes; //initialize the device, otherwise we may have indexes where there is random garbage in both cases - for (uint32_t i = 0; i < list_size; ++i) { - uint64_t woffset = addr_list[i] * dev->lba_size_bytes; + for(uint32_t i = 0; i < list_size; i++){ + uint64_t woffset = (addr_list[i]) * dev->lba_size_bytes; //random offset within the page and just write some random stuff = this is to make a unique I/O pattern - b1[min + rand() % (max - min)] = (char)rand(); + b1[(min + (rand() % (max - min)))] = (char) rand(); // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if (ret) { + if(ret != 0){ printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); goto done; } ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if (ret) { + if(ret != 0){ printf("Error: file writing failed at offset 0x%lx \n", woffset); goto done; } } printf("the ZNS user device has been written (ONCE) completely OK\n"); - if (max_hammer_io > 0) { + if(max_hammer_io > 0){ printf("Hammering some random LBAs %d times \n", max_hammer_io); - for (uint32_t i = 0; i < max_hammer_io; ++i) { + for(uint32_t i = 0; i < max_hammer_io; i++){ // we should not generate offset which is within the list_size - uint64_t woffset = addr_list[0 + rand() % (list_size - 0)] * dev->lba_size_bytes; + uint64_t woffset = (addr_list[ 0 + (rand() % (list_size - 0))]) * dev->lba_size_bytes; //random offset within the page and just write some random stuff, like i - b1[min + rand() % (max - min)] = (char)rand(); + b1[(min + (rand() % (max - min)))] = (char) rand(); // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if (ret) { + if(ret != 0){ printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); goto done; } ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if (ret) { + if(ret != 0){ printf("Error: file writing failed at offset 0x%lx \n", woffset); goto done; } @@ -167,22 +166,21 @@ static int wr_full_device_verify(struct user_zns_device *dev, write_pattern(b1, dev->lba_size_bytes); write_pattern(b2, dev->lba_size_bytes); // and now read the whole device and compare the content with the file - for (uint32_t i = 0; i < list_size; ++i) { - uint64_t roffset = addr_list[i] * dev->lba_size_bytes; + for(uint32_t i = 0; i < list_size; i++){ + uint64_t roffset = (addr_list[i]) * dev->lba_size_bytes; // now we need to write the buffer in parallel to the zns device, and the file ret = zns_udevice_read(dev, roffset, b1, dev->lba_size_bytes); assert(ret == 0); ret = read_complete_file(fd, roffset, b2, dev->lba_size_bytes); assert(ret == 0); //now both of these should match - for (uint32_t j = 0; j < dev->lba_size_bytes; ++j) { - if (b1[j] != b2[j]) { + for(uint32_t j = 0; j < dev->lba_size_bytes; j++) + if(b1[j] != b2[j]){ printf("ERROR: buffer mismatch at i %d and j %d , address is 0%lx expecting %x found %x \n", i, j, roffset, b2[j], b1[j]); ret = -EINVAL; goto done; } - } } printf("Verification passed on the while device \n"); @@ -191,13 +189,13 @@ static int wr_full_device_verify(struct user_zns_device *dev, free(b2); close(fd); ret = remove(tmp_file); - if (ret) + if(ret != 0){ printf("Error: file deleting failed with ret %d \n", ret); + } return ret; } -static int show_help() -{ +static int show_help(){ printf("Usage: m2 -d device_name -h -r \n"); printf("-d : /dev/nvmeXpY - in this format with the full path \n"); printf("-r : resume if the FTL can. \n"); @@ -208,13 +206,12 @@ static int show_help() return 0; } -int main(int argc, char **argv) -{ +int main(int argc, char **argv) { uint64_t start, end; start = microseconds_since_epoch(); - srand((unsigned)time(NULL) * getpid()); + srand( (unsigned) time(NULL) * getpid()); int ret, c; - char *zns_device_name = (char *)"nvme0n1", *str1 = nullptr; + char *zns_device_name = (char*) "nvme0n1", *str1 = nullptr; struct user_zns_device *my_dev = nullptr; uint64_t *seq_addresses = nullptr, *random_addresses = nullptr; uint32_t to_hammer_lba = 10000; @@ -230,57 +227,58 @@ int main(int argc, char **argv) printf("===================================================================================== \n"); while ((c = getopt(argc, argv, "o:m:l:d:w:hr")) != -1) { switch (c) { - case 'h': - show_help(); - exit(0); - case 'r': - params.force_reset = false; - break; - case 'o': - to_hammer_lba = atoi(optarg); - break; - case 'd': - str1 = strdupa(optarg); - if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); - exit(EXIT_FAILURE); - } - for (int j = 1; ; ++j) { - char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) - break; - // if there was a valid parse, just save it - zns_device_name = token; - } - free(str1); - break; - case 'l': - params.log_zones = atoi(optarg); - if (params.log_zones < 3) { - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + case 'h': + show_help(); + exit(0); + case 'r': + params.force_reset = false; + break; + case 'o': + to_hammer_lba = atoi(optarg); + break; + case 'd': + str1 = strdupa(optarg); + if (!str1) { + printf("Could not parse the arguments for the device %s '\n", optarg); + exit(EXIT_FAILURE); + } + for (int j = 1; ; j++) { + char *token = strsep(&str1, "/"); // delimited is "/" + if (token == nullptr) { + break; + } + // if there was a valid parse, just save it + zns_device_name = token; + } + free(str1); + break; + case 'l': + params.log_zones = atoi(optarg); + if (params.log_zones < 3){ + printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + exit(-1); + } + break; + case 'w': + params.gc_wmark = atoi(optarg); + if (params.gc_wmark < 1){ + printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); + exit(-1); + } + break; + default: + show_help(); exit(-1); - } - break; - case 'w': - params.gc_wmark = atoi(optarg); - if (params.gc_wmark < 1) { - printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); - exit(-1); - } - break; - default: - show_help(); - exit(-1); } } params.name = strdup(zns_device_name); printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s hammer-time %d \n", - params.name, params.log_zones, params.gc_wmark, params.force_reset == 1 ? "yes" : "no", to_hammer_lba); + params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no", to_hammer_lba); ret = init_ss_zns_device(¶ms, &my_dev); assert (ret == 0); - assert(my_dev->lba_size_bytes); - assert(my_dev->capacity_bytes); + assert(my_dev->lba_size_bytes != 0); + assert(my_dev->capacity_bytes != 0); uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; // get a sequential LBA address list get_sequence_as_array(max_lba_entries, &seq_addresses, false); @@ -304,7 +302,7 @@ int main(int argc, char **argv) printf("[stosys-result] Test 2 randomized write, read, and match (full device) : %s \n", (t2 == 0 ? " Passed" : " Failed")); printf("[stosys-result] Test 3 randomized write, read, and match (full device, hammer %-6u) : %s \n", to_hammer_lba, (t3 == 0 ? " Passed" : " Failed")); printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", (end - start) / 1000); + printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); printf("====================================================================\n"); return ret; } diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a89f592..9676c60 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -20,9 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include #include +#include #include #include #include @@ -30,216 +30,225 @@ SOFTWARE. extern "C" { -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) +enum {METADATA_MAP_LEN = 9999}; + +struct metadata_map { + //FIXME: Add No of blocks written as well. + uint64_t logical_addr; + unsigned long long physical_addr; + metadata_map *next; +}; +struct zns_info { + // Fixed values + // int num_log_zones; + // int gc_trigger; + int fd; + unsigned nsid; + unsigned long long zone_num_pages; + // uint64_t upper_logical_addr_bound; + // Log zone maintainance + // uint32_t no_of_used_log_zones; // Keep track of used log zones + unsigned long long curr_log_zone_saddr; // Point to current log zone starting address + metadata_map *map[METADATA_MAP_LEN]; // Hashmap to store log +}; + + +static inline int hash_function(uint64_t key) +{ + return key % METADATA_MAP_LEN; +} + +static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_addr) +{ + //Check if current log zone is ended, then change to next log zone + if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) + info->curr_log_zone_saddr = last_append_addr + 1; +} + +static int lookup_map(metadata_map *map[METADATA_MAP_LEN], + uint64_t logical_addr, unsigned long long *physical_addr) +{ + int index = hash_function(logical_addr); + metadata_map *head = map[index]; + while (head) { + if (head->logical_addr == logical_addr) { + *physical_addr = head->physical_addr; + return 0; + } + head = head->next; + } + return 1; +} + +static void update_map(metadata_map *map[METADATA_MAP_LEN], + uint64_t logical_addr, unsigned long long physical_addr) +{ + int index = hash_function(logical_addr); + //Fill in hashmap + if (map[index] == NULL) { + metadata_map *entry = (metadata_map *)calloc(1, sizeof(metadata_map)); + entry->logical_addr = logical_addr; + entry->physical_addr = physical_addr; + map[index] = entry; + return; + } + if (map[index]->logical_addr == logical_addr) { + map[index]->physical_addr = physical_addr; + return; + } + metadata_map *head = map[index]; + while (head->next) { + if (head->next->logical_addr == logical_addr) { + head->physical_addr = physical_addr; + return; + } + head = head->next; + } + metadata_map *entry = (metadata_map *)calloc(1, sizeof(metadata_map)); + entry->logical_addr = logical_addr; + entry->physical_addr = physical_addr; + head->next = entry; +} + +static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, + void *buffer, uint32_t size) +{ + // void *metadata = NULL; + // unsigned metadata_len = 0; + unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; + zns_info *info = (zns_info *)my_dev->_private; + nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, + 0, 0, 0, 0, 0, size, buffer, 0, NULL); + //ss_nvme_show_status(errno); + return errno; +} + +static int append_to_log_zone(user_zns_device *my_dev, unsigned long long *physical_addr, + void *buffer, uint32_t size) { - *my_dev = (struct user_zns_device *)calloc(1, sizeof(struct user_zns_device)); - (*my_dev)->_private = calloc(1, sizeof(struct zns_info)); - struct zns_info *info = (struct zns_info *)(*my_dev)->_private; - // get gc_trigger - info->gc_trigger = params->gc_wmark; - // get no_of_log_zones - info->no_of_log_zones = params->log_zones; - // get fd + // void *metadata = NULL; + // unsigned metadata_len = 0; + unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; //calc from size and page_size + //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. + zns_info *info = (zns_info *)my_dev->_private; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone_saddr, number_of_pages, + 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); + //ss_nvme_show_status(errno); + return errno; +} + +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) +{ + *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); + (*my_dev)->_private = calloc(1, sizeof(zns_info)); + zns_info *info = (zns_info *)(*my_dev)->_private; + // set num_log_zones + // info->num_log_zones = params->log_zones; + // set gc_trigger + // info->gc_trigger = params->gc_wmark; + // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { - printf("Device %s opened failed %d errno %d\n", params->name, info->fd, errno); - return 1; + printf("Dev %s opened failed %d\n", params->name, info->fd); + return errno; } - // get nsid + // set nsid int ret = nvme_get_nsid(info->fd, &info->nsid); if (ret) { printf("Error: failed to retrieve the namespace id %d\n", ret); - return 1; + return ret; } // reset device if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0, true, NVME_ZNS_ZSA_RESET, 0, NULL); + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0, true, + NVME_ZNS_ZSA_RESET, 0, NULL); if (ret) { printf("Zone reset failed %d\n", ret); - return 1; + return ret; } } - // get zns_lba_size lba_size_bytes nvm_page_size - struct nvme_id_ns ns; + // set zns_lba_size + nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); if (ret) { - printf("Error: failed to retrieve the nvme identify namespace %d\n", ret); - return 1; + printf("Failed to retrieve the nvme identify namespace %d\n", ret); + return ret; } (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; + // set lba_size_bytes (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; - info->nvm_page_size = (*my_dev)->tparams.zns_lba_size; - // get zns_zone_capacity capacity_bytes zones_capacity - struct nvme_zns_id_ns data; - nvme_zns_identify_ns(info->fd, info->nsid, &data); - (*my_dev)->tparams.zns_zone_capacity = data.lbafe[ns.flbas & 0xF].zsze * (*my_dev)->tparams.zns_lba_size; - (*my_dev)->capacity_bytes = ((*my_dev)->tparams.zns_num_zones - (info->no_of_log_zones))*(*my_dev)->tparams.zns_zone_capacity; //FIXME: Capacity bytes is (total_no_zones - log_zones) * zone_size; - info->zone_capacity = (*my_dev)->tparams.zns_zone_capacity; - // get zns_num_zones no_of_zones - struct nvme_zone_report zns_report; - ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, sizeof(zns_report), &zns_report); + // set zns_num_zones + nvme_zone_report zns_report; + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, + NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, + sizeof(zns_report), &zns_report); if (ret) { - printf("Failed to report zones, ret %d \n", ret); - return 1; + printf("Failed to report zones, ret %d\n", ret); + return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->no_of_zones = (*my_dev)->tparams.zns_num_zones; - // set no_of_used_log_zones - info->no_of_used_log_zones = 0; - // set curr_log_zone_starting_addr - info->curr_log_zone_starting_addr = 0; + // set zone_num_pages + nvme_zns_id_ns data; + nvme_zns_identify_ns(info->fd, info->nsid, &data); + info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = #page_per_zone * zone_size + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + (*my_dev)->tparams.zns_lba_size; + // set capacity_bytes = #zone * zone_capacity + (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_num_zones * + (*my_dev)->tparams.zns_zone_capacity; // init upper_logical_addr_bound - // init map - // - info->no_of_pages_per_zone = info->zone_capacity/info->nvm_page_size; return 0; } - - -int hash_function(uint64_t key) { - return key%METADATA_LOG_MAP_LEN; -} - -void update_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t physical_address) { - int index = hash_function(logical_address); - - struct metadata_log_map *entry; - entry = (metadata_log_map *) malloc(sizeof(metadata_log_map)); - entry->physical_address = physical_address; - entry->logical_address = logical_address; - entry->next = NULL; - - //Fill in hashmap - if(map[index] == NULL) - map[index] = entry; - else if(map[index]->logical_address == logical_address) - map[index] = entry; - else { - struct metadata_log_map *head; - head = map[index]; - while(head->next != NULL) { - //Break if next entry is same logical address - if (head->next->logical_address == logical_address) - break; - head = head->next; - } - head->next = entry; - } -} - -int lookup_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t *physical_address) { - int index = hash_function(logical_address); - struct metadata_log_map *head; - int err; - err = -1; - head = map[index]; - while(head != NULL) { - if(head->logical_address == logical_address) { - *physical_address = head->physical_address; - err = 0; - break; - } - head = head->next; - } - - return err; -} - -int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t *address_written) { - int errno; - void *mbuffer = NULL; - long long mbuffer_size = 0; - uint32_t number_of_pages = (size/ptr->nvm_page_size)-1; //calc from size and page_size - //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. - errno = nvme_zns_append(ptr->fd, ptr->nsid, ptr->curr_log_zone_starting_addr, number_of_pages, 0, - 0, 0, 0, size, buffer, mbuffer_size, mbuffer, (long long unsigned int*) address_written); - //ss_nvme_show_status(errno); - return errno; -} - - -//FIXME: Update log zone if current zone cant support current write req -/* -int check_update_curr_log_zone_validity(zns_info *ptr, uint32_t size) { - int errno; - if ptr -} -*/ -int read_data_from_nvme(zns_info *ptr, uint64_t address, void *buffer, uint32_t size) { - int errno; - void *mbuffer = NULL; - long long mbuffer_size = 0; - uint32_t number_of_pages = (size/ptr->nvm_page_size) - 1; - errno = nvme_read(ptr->fd, ptr->nsid, address, number_of_pages, 0, 0, 0, - 0, 0, size, buffer, mbuffer_size, mbuffer); - //ss_nvme_show_status(errno); - return errno; -} - - - -void check_to_trigger_GC(struct zns_info *info, uint64_t last_log_append_addr) { - //Check if current log zone is ended, then change to next log zone - if((last_log_append_addr - info->curr_log_zone_starting_addr) == info->no_of_pages_per_zone - 1) - info->curr_log_zone_starting_addr = last_log_append_addr + 1; -} - -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - int err; - uint64_t physical_address; - zns_info *info; - info = (zns_info *) my_dev->_private; +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + unsigned long long physical_addr = 0; + zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr - err = lookup_log_map(info->map, address, &physical_address); - if(err != 0) - return err; - - errno = read_data_from_nvme(info, physical_address, buffer, size); - - return err; -} - - -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - int err; - uint64_t physical_page_address; - zns_info *info; - info = (zns_info *) my_dev->_private; - err = append_data_to_log_zone(info, buffer, size, &physical_page_address); - if(err != 0) - return err; - check_to_trigger_GC(info, physical_page_address); - update_log_map(info->map, address, physical_page_address); - return err; -} - -void clear_entry(struct metadata_log_map *entry) { - if(entry == NULL) - return; - clear_entry(entry->next); - free(entry); - return; + int ret = lookup_map(info->map, address, &physical_addr); + if (ret) + return ret; + read_from_nvme(my_dev, physical_addr, buffer, size); + return errno; } -void free_hashmap(struct metadata_log_map *map[METADATA_LOG_MAP_LEN]) { - for(int i = 0; i < METADATA_LOG_MAP_LEN; i++) - clear_entry(map[i]); +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + unsigned long long physical_addr = 0; + zns_info *info = (zns_info *)my_dev->_private; + int ret = append_to_log_zone(my_dev, &physical_addr, buffer, size); + if (ret) + return ret; + check_to_trigger_GC(info, physical_addr); + update_map(info->map, address, physical_addr); + return 0; } int deinit_ss_zns_device(struct user_zns_device *my_dev) { - int err; - struct zns_info *info; - info = (zns_info *) my_dev->_private; - + metadata_map **map = ((zns_info *)my_dev->_private)->map; //free hashmap - free_hashmap(info->map); - free(info); + for (int i = 0; i < METADATA_MAP_LEN; ++i) { + while (map[i]) { + metadata_map *tmp = map[i]; + map[i] = map[i]->next; + free(tmp); + } + } + free(my_dev->_private); free(my_dev); - return err; + return 0; } +//FIXME: Update log zone if current zone cant support current write req +/* +static int check_update_curr_log_zone_validity(zns_info *info, uint32_t size) { + if info +} +*/ } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 2552636..69c9f10 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -24,10 +24,10 @@ SOFTWARE. #define STOSYS_PROJECT_ZNS_DEVICE_H #include - -#define METADATA_LOG_MAP_LEN 9999 +#include extern "C" { + //https://github.com/mplulu/google-breakpad/issues/481 - taken from here #define typeof __typeof__ #define container_of(ptr, type, member) ({ \ @@ -60,45 +60,11 @@ struct zdev_init_params { bool force_reset; }; - -struct metadata_log_map { - //FIXME: Add No of blocks written as well. - uint64_t logical_address; - uint64_t physical_address; - struct metadata_log_map *next; -}; - -struct zns_info { - //Fixed values - int fd; - int gc_trigger; - uint32_t nsid; - uint32_t nvm_page_size; - uint32_t zone_capacity; - uint32_t no_of_pages_per_zone; - uint32_t no_of_zones; - uint32_t no_of_log_zones; - //Future use - uint64_t upper_logical_addr_bound; - - //Log zone maintainance - uint32_t no_of_used_log_zones; //Keep track of used log zones - uint64_t curr_log_zone_starting_addr; //Point to current log zone starting address - struct metadata_log_map *map[METADATA_LOG_MAP_LEN]; //Hashmap to store log -}; - - -int hash_function(uint64_t key); -void update_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t physical_address); -int lookup_log_map(metadata_log_map *map[METADATA_LOG_MAP_LEN], uint64_t logical_address, uint64_t *physical_address); -int append_data_to_log_zone(zns_info *ptr, void *buffer, uint32_t size, uint64_t *address_written); - - - int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); int deinit_ss_zns_device(struct user_zns_device *my_dev); + }; #endif //STOSYS_PROJECT_ZNS_DEVICE_H From d807c7dbbe914d1b718d387374d2bb4eebda8465 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 18 Sep 2022 16:11:38 +0000 Subject: [PATCH 014/101] restore the prototype of init_ss_zns_device --- src/m23-ftl/zns_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 9676c60..ba7dd0b 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -137,7 +137,7 @@ static int append_to_log_zone(user_zns_device *my_dev, unsigned long long *physi return errno; } -int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) +int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1, sizeof(zns_info)); From 51d796cf406df4c2c47662251a7e06f7925f05c5 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 19 Sep 2022 15:29:24 +0000 Subject: [PATCH 015/101] Changed cpacity bytes defintion and added few more parameters --- src/m23-ftl/zns_device.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 9676c60..59eca1f 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -38,6 +38,7 @@ struct metadata_map { unsigned long long physical_addr; metadata_map *next; }; + struct zns_info { // Fixed values // int num_log_zones; @@ -45,9 +46,12 @@ struct zns_info { int fd; unsigned nsid; unsigned long long zone_num_pages; + uint32_t no_of_zones; // uint64_t upper_logical_addr_bound; + // Log zone maintainance - // uint32_t no_of_used_log_zones; // Keep track of used log zones + uint32_t no_of_used_log_zones; // Keep track of used log zones + uint32_t no_of_log_zones; unsigned long long curr_log_zone_saddr; // Point to current log zone starting address metadata_map *map[METADATA_MAP_LEN]; // Hashmap to store log }; @@ -60,9 +64,12 @@ static inline int hash_function(uint64_t key) static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_addr) { + //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition //Check if current log zone is ended, then change to next log zone - if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) + if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) { + info->no_of_used_log_zones ++; info->curr_log_zone_saddr = last_append_addr + 1; + } } static int lookup_map(metadata_map *map[METADATA_MAP_LEN], @@ -187,6 +194,8 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); + info->no_of_zones = (*my_dev)->tparams.zns_num_zones; + // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); @@ -195,8 +204,9 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * (*my_dev)->tparams.zns_lba_size; // set capacity_bytes = #zone * zone_capacity - (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_num_zones * + (*my_dev)->capacity_bytes = ((*my_dev)->tparams.zns_num_zones - params->log_zones) * (*my_dev)->tparams.zns_zone_capacity; + info->no_of_log_zones = params->log_zones; // init upper_logical_addr_bound return 0; } @@ -243,12 +253,4 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) free(my_dev); return 0; } - -//FIXME: Update log zone if current zone cant support current write req -/* -static int check_update_curr_log_zone_validity(zns_info *info, uint32_t size) { - if info -} -*/ - } From ba31649d884831d5440f61c8e0792bf2f685e96d Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 19 Sep 2022 15:43:45 +0000 Subject: [PATCH 016/101] add read cache --- src/m23-ftl/zns_device.cpp | 83 +++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index ba7dd0b..ec60265 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -24,18 +24,23 @@ SOFTWARE. #include #include #include +#include #include #include #include "zns_device.h" extern "C" { -enum {METADATA_MAP_LEN = 9999}; +enum {METADATA_MAP_LEN = 9999, BUF_SIZE = 128 * 4096}; +static uint32_t used_buf_size = 0; // #lba struct metadata_map { //FIXME: Add No of blocks written as well. uint64_t logical_addr; unsigned long long physical_addr; + void *data; + uint32_t size; + uint64_t count; metadata_map *next; }; struct zns_info { @@ -65,14 +70,51 @@ static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_a info->curr_log_zone_saddr = last_append_addr + 1; } +static void update_cache(metadata_map *map[METADATA_MAP_LEN], metadata_map *metadata, + void *buf, uint32_t size, uint32_t count_threshold) +{ + metadata->data = NULL; + metadata->size = 0; + if (size <= BUF_SIZE - used_buf_size) { + metadata->data = calloc(1, size); + memcpy(metadata->data, buf, size); + metadata->size = size; + used_buf_size += size; + return; + } + for (int i = 0; i < METADATA_MAP_LEN; ++i) { + for (metadata_map *head = map[i]; head; head = head->next) { + if (head->count < count_threshold && head->size >= size) { + free(head->data); + head->data = NULL; + used_buf_size -= head->size; + head->size = 0; + metadata->data = calloc(1, size); + memcpy(metadata->data, buf, size); + metadata->size = size; + used_buf_size += size; + return; + } + } + } +} + static int lookup_map(metadata_map *map[METADATA_MAP_LEN], - uint64_t logical_addr, unsigned long long *physical_addr) + uint64_t logical_addr, unsigned long long *physical_addr, + void *buf, uint32_t size, bool *get) { int index = hash_function(logical_addr); metadata_map *head = map[index]; while (head) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; + ++head->count; + if (head->size) { + memcpy(buf, head->data, size); + *get = true; + } else { + update_cache(map, head, buf, size, head->count); + } return 0; } head = head->next; @@ -81,33 +123,40 @@ static int lookup_map(metadata_map *map[METADATA_MAP_LEN], } static void update_map(metadata_map *map[METADATA_MAP_LEN], - uint64_t logical_addr, unsigned long long physical_addr) + uint64_t logical_addr, unsigned long long physical_addr, + void *buf, uint32_t size) { int index = hash_function(logical_addr); //Fill in hashmap if (map[index] == NULL) { - metadata_map *entry = (metadata_map *)calloc(1, sizeof(metadata_map)); - entry->logical_addr = logical_addr; - entry->physical_addr = physical_addr; - map[index] = entry; + map[index] = (metadata_map *)calloc(1, sizeof(metadata_map)); + map[index]->logical_addr = logical_addr; + map[index]->physical_addr = physical_addr; + update_cache(map, map[index], buf, size, 1); return; } if (map[index]->logical_addr == logical_addr) { map[index]->physical_addr = physical_addr; + free(map[index]->data); + used_buf_size -= map[index]->size; + update_cache(map, map[index], buf, size, 1); return; } metadata_map *head = map[index]; while (head->next) { if (head->next->logical_addr == logical_addr) { - head->physical_addr = physical_addr; + head->next->physical_addr = physical_addr; + free(head->next->data); + used_buf_size -= head->next->size; + update_cache(map, head->next, buf, size, 1); return; } head = head->next; } - metadata_map *entry = (metadata_map *)calloc(1, sizeof(metadata_map)); - entry->logical_addr = logical_addr; - entry->physical_addr = physical_addr; - head->next = entry; + head->next = (metadata_map *)calloc(1, sizeof(metadata_map)); + head->next->logical_addr = logical_addr; + head->next->physical_addr = physical_addr; + update_cache(map, head->next, buf, size, 1); } static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, @@ -208,10 +257,12 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr - int ret = lookup_map(info->map, address, &physical_addr); + bool get = false; + int ret = lookup_map(info->map, address, &physical_addr, buffer, size, &get); if (ret) return ret; - read_from_nvme(my_dev, physical_addr, buffer, size); + if (!get) + read_from_nvme(my_dev, physical_addr, buffer, size); return errno; } @@ -224,7 +275,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; check_to_trigger_GC(info, physical_addr); - update_map(info->map, address, physical_addr); + update_map(info->map, address, physical_addr, buffer, size); return 0; } @@ -234,6 +285,8 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) //free hashmap for (int i = 0; i < METADATA_MAP_LEN; ++i) { while (map[i]) { + if (map[i]->data) + free(map[i]->data); metadata_map *tmp = map[i]; map[i] = map[i]->next; free(tmp); From 3f92d2df0d48065a25e7d5c9f107bd3325f8c0ab Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 19 Sep 2022 16:12:24 +0000 Subject: [PATCH 017/101] turn off the read cache function --- src/m23-ftl/zns_device.cpp | 123 ++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 58 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index ec60265..fb8f26b 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -32,15 +32,16 @@ SOFTWARE. extern "C" { enum {METADATA_MAP_LEN = 9999, BUF_SIZE = 128 * 4096}; -static uint32_t used_buf_size = 0; // #lba +// enum {BUF_SIZE = 128 * 4096}; +// static uint32_t used_buf_size = 0; // #lba struct metadata_map { //FIXME: Add No of blocks written as well. uint64_t logical_addr; unsigned long long physical_addr; - void *data; - uint32_t size; - uint64_t count; + // void *data; + // uint32_t size; + // uint64_t count; metadata_map *next; }; struct zns_info { @@ -70,51 +71,53 @@ static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_a info->curr_log_zone_saddr = last_append_addr + 1; } -static void update_cache(metadata_map *map[METADATA_MAP_LEN], metadata_map *metadata, - void *buf, uint32_t size, uint32_t count_threshold) -{ - metadata->data = NULL; - metadata->size = 0; - if (size <= BUF_SIZE - used_buf_size) { - metadata->data = calloc(1, size); - memcpy(metadata->data, buf, size); - metadata->size = size; - used_buf_size += size; - return; - } - for (int i = 0; i < METADATA_MAP_LEN; ++i) { - for (metadata_map *head = map[i]; head; head = head->next) { - if (head->count < count_threshold && head->size >= size) { - free(head->data); - head->data = NULL; - used_buf_size -= head->size; - head->size = 0; - metadata->data = calloc(1, size); - memcpy(metadata->data, buf, size); - metadata->size = size; - used_buf_size += size; - return; - } - } - } -} +// static void update_cache(metadata_map *map[METADATA_MAP_LEN], metadata_map *metadata, +// void *buf, uint32_t size, uint32_t count_threshold) +// { +// metadata->data = NULL; +// metadata->size = 0; +// if (size <= BUF_SIZE - used_buf_size) { +// metadata->data = calloc(1, size); +// memcpy(metadata->data, buf, size); +// metadata->size = size; +// used_buf_size += size; +// return; +// } +// for (int i = 0; i < METADATA_MAP_LEN; ++i) { +// for (metadata_map *head = map[i]; head; head = head->next) { +// if (head->count < count_threshold && head->size >= size) { +// free(head->data); +// head->data = NULL; +// used_buf_size -= head->size; +// head->size = 0; +// metadata->data = calloc(1, size); +// memcpy(metadata->data, buf, size); +// metadata->size = size; +// used_buf_size += size; +// return; +// } +// } +// } +// } +// static int lookup_map(metadata_map *map[METADATA_MAP_LEN], +// uint64_t logical_addr, unsigned long long *physical_addr, +// void *buf, uint32_t size, bool *get) static int lookup_map(metadata_map *map[METADATA_MAP_LEN], - uint64_t logical_addr, unsigned long long *physical_addr, - void *buf, uint32_t size, bool *get) + uint64_t logical_addr, unsigned long long *physical_addr) { int index = hash_function(logical_addr); metadata_map *head = map[index]; while (head) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; - ++head->count; - if (head->size) { - memcpy(buf, head->data, size); - *get = true; - } else { - update_cache(map, head, buf, size, head->count); - } + // ++head->count; + // if (head->size) { + // memcpy(buf, head->data, size); + // *get = true; + // } else { + // update_cache(map, head, buf, size, head->count); + // } return 0; } head = head->next; @@ -122,9 +125,11 @@ static int lookup_map(metadata_map *map[METADATA_MAP_LEN], return 1; } +// static void update_map(metadata_map *map[METADATA_MAP_LEN], +// uint64_t logical_addr, unsigned long long physical_addr, +// void *buf, uint32_t size) static void update_map(metadata_map *map[METADATA_MAP_LEN], - uint64_t logical_addr, unsigned long long physical_addr, - void *buf, uint32_t size) + uint64_t logical_addr, unsigned long long physical_addr) { int index = hash_function(logical_addr); //Fill in hashmap @@ -132,23 +137,23 @@ static void update_map(metadata_map *map[METADATA_MAP_LEN], map[index] = (metadata_map *)calloc(1, sizeof(metadata_map)); map[index]->logical_addr = logical_addr; map[index]->physical_addr = physical_addr; - update_cache(map, map[index], buf, size, 1); + // update_cache(map, map[index], buf, size, 1); return; } if (map[index]->logical_addr == logical_addr) { map[index]->physical_addr = physical_addr; - free(map[index]->data); - used_buf_size -= map[index]->size; - update_cache(map, map[index], buf, size, 1); + // free(map[index]->data); + // used_buf_size -= map[index]->size; + // update_cache(map, map[index], buf, size, 1); return; } metadata_map *head = map[index]; while (head->next) { if (head->next->logical_addr == logical_addr) { head->next->physical_addr = physical_addr; - free(head->next->data); - used_buf_size -= head->next->size; - update_cache(map, head->next, buf, size, 1); + // free(head->next->data); + // used_buf_size -= head->next->size; + // update_cache(map, head->next, buf, size, 1); return; } head = head->next; @@ -156,7 +161,7 @@ static void update_map(metadata_map *map[METADATA_MAP_LEN], head->next = (metadata_map *)calloc(1, sizeof(metadata_map)); head->next->logical_addr = logical_addr; head->next->physical_addr = physical_addr; - update_cache(map, head->next, buf, size, 1); + // update_cache(map, head->next, buf, size, 1); } static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, @@ -257,12 +262,13 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr - bool get = false; - int ret = lookup_map(info->map, address, &physical_addr, buffer, size, &get); + // bool get = false; + // int ret = lookup_map(info->map, address, &physical_addr, buffer, size, &get); + int ret = lookup_map(info->map, address, &physical_addr); if (ret) return ret; - if (!get) - read_from_nvme(my_dev, physical_addr, buffer, size); + // if (!get) + read_from_nvme(my_dev, physical_addr, buffer, size); return errno; } @@ -275,7 +281,8 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; check_to_trigger_GC(info, physical_addr); - update_map(info->map, address, physical_addr, buffer, size); + // update_map(info->map, address, physical_addr, buffer, size); + update_map(info->map, address, physical_addr); return 0; } @@ -285,8 +292,8 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) //free hashmap for (int i = 0; i < METADATA_MAP_LEN; ++i) { while (map[i]) { - if (map[i]->data) - free(map[i]->data); + // if (map[i]->data) + // free(map[i]->data); metadata_map *tmp = map[i]; map[i] = map[i]->next; free(tmp); From 74a6db4678d941d3e4ce82bbe117873a75018f51 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 19 Sep 2022 16:22:47 +0000 Subject: [PATCH 018/101] turn off the read cache function --- src/m23-ftl/zns_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index fb8f26b..56fdc5a 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -33,7 +33,7 @@ extern "C" { enum {METADATA_MAP_LEN = 9999, BUF_SIZE = 128 * 4096}; // enum {BUF_SIZE = 128 * 4096}; -// static uint32_t used_buf_size = 0; // #lba +// static uint32_t used_buf_size = 0; struct metadata_map { //FIXME: Add No of blocks written as well. From eb66cdf9b39f5b9618d9cfaf86db6894d012c950 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 23 Sep 2022 09:24:33 +0000 Subject: [PATCH 019/101] change map's hash function --- src/m23-ftl/zns_device.cpp | 89 ++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 56fdc5a..1d968d2 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -31,7 +31,6 @@ SOFTWARE. extern "C" { -enum {METADATA_MAP_LEN = 9999, BUF_SIZE = 128 * 4096}; // enum {BUF_SIZE = 128 * 4096}; // static uint32_t used_buf_size = 0; @@ -44,34 +43,40 @@ struct metadata_map { // uint64_t count; metadata_map *next; }; + struct zns_info { // Fixed values - // int num_log_zones; - // int gc_trigger; int fd; unsigned nsid; + int num_log_zones; + int num_data_zones; + int gc_trigger; unsigned long long zone_num_pages; // uint64_t upper_logical_addr_bound; + // Log zone maintainance - // uint32_t no_of_used_log_zones; // Keep track of used log zones + uint32_t no_of_used_log_zones; // Keep track of used log zones unsigned long long curr_log_zone_saddr; // Point to current log zone starting address - metadata_map *map[METADATA_MAP_LEN]; // Hashmap to store log + metadata_map **map; // Hashmap to store log }; -static inline int hash_function(uint64_t key) +static inline int hash_function(uint64_t key, uint32_t zns_zone_capacity) { - return key % METADATA_MAP_LEN; + return key / zns_zone_capacity; } static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_addr) { + //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition //Check if current log zone is ended, then change to next log zone - if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) + if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) { + ++info->no_of_used_log_zones; info->curr_log_zone_saddr = last_append_addr + 1; + } } -// static void update_cache(metadata_map *map[METADATA_MAP_LEN], metadata_map *metadata, +// static void update_cache(zns_info *info, metadata_map *metadata, // void *buf, uint32_t size, uint32_t count_threshold) // { // metadata->data = NULL; @@ -83,8 +88,8 @@ static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_a // used_buf_size += size; // return; // } -// for (int i = 0; i < METADATA_MAP_LEN; ++i) { -// for (metadata_map *head = map[i]; head; head = head->next) { +// for (int i = 0; i < info->num_data_zones; ++i) { +// for (metadata_map *head = info->map[i]; head; head = head->next) { // if (head->count < count_threshold && head->size >= size) { // free(head->data); // head->data = NULL; @@ -100,14 +105,15 @@ static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_a // } // } -// static int lookup_map(metadata_map *map[METADATA_MAP_LEN], +// static int lookup_map(user_zns_device *my_dev, // uint64_t logical_addr, unsigned long long *physical_addr, // void *buf, uint32_t size, bool *get) -static int lookup_map(metadata_map *map[METADATA_MAP_LEN], +static int lookup_map(user_zns_device *my_dev, uint64_t logical_addr, unsigned long long *physical_addr) { - int index = hash_function(logical_addr); - metadata_map *head = map[index]; + int index = hash_function(logical_addr, my_dev->tparams.zns_zone_capacity); + zns_info *info = ((zns_info *)my_dev->_private); + metadata_map *head = info->map[index]; while (head) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; @@ -116,7 +122,7 @@ static int lookup_map(metadata_map *map[METADATA_MAP_LEN], // memcpy(buf, head->data, size); // *get = true; // } else { - // update_cache(map, head, buf, size, head->count); + // update_cache(info, head, buf, size, head->count); // } return 0; } @@ -125,26 +131,28 @@ static int lookup_map(metadata_map *map[METADATA_MAP_LEN], return 1; } -// static void update_map(metadata_map *map[METADATA_MAP_LEN], +// static void update_map(user_zns_device *my_dev, // uint64_t logical_addr, unsigned long long physical_addr, // void *buf, uint32_t size) -static void update_map(metadata_map *map[METADATA_MAP_LEN], +static void update_map(user_zns_device *my_dev, uint64_t logical_addr, unsigned long long physical_addr) { - int index = hash_function(logical_addr); + int index = hash_function(logical_addr, my_dev->tparams.zns_zone_capacity); + zns_info *info = ((zns_info *)my_dev->_private); + metadata_map **map = info->map; //Fill in hashmap if (map[index] == NULL) { map[index] = (metadata_map *)calloc(1, sizeof(metadata_map)); map[index]->logical_addr = logical_addr; map[index]->physical_addr = physical_addr; - // update_cache(map, map[index], buf, size, 1); + // update_cache(info, map[index], buf, size, 1); return; } if (map[index]->logical_addr == logical_addr) { map[index]->physical_addr = physical_addr; // free(map[index]->data); // used_buf_size -= map[index]->size; - // update_cache(map, map[index], buf, size, 1); + // update_cache(info, map[index], buf, size, 1); return; } metadata_map *head = map[index]; @@ -153,7 +161,7 @@ static void update_map(metadata_map *map[METADATA_MAP_LEN], head->next->physical_addr = physical_addr; // free(head->next->data); // used_buf_size -= head->next->size; - // update_cache(map, head->next, buf, size, 1); + // update_cache(info, head->next, buf, size, 1); return; } head = head->next; @@ -161,7 +169,7 @@ static void update_map(metadata_map *map[METADATA_MAP_LEN], head->next = (metadata_map *)calloc(1, sizeof(metadata_map)); head->next->logical_addr = logical_addr; head->next->physical_addr = physical_addr; - // update_cache(map, head->next, buf, size, 1); + // update_cache(info, head->next, buf, size, 1); } static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, @@ -197,9 +205,9 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * (*my_dev)->_private = calloc(1, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; // set num_log_zones - // info->num_log_zones = params->log_zones; + info->num_log_zones = params->log_zones; // set gc_trigger - // info->gc_trigger = params->gc_wmark; + info->gc_trigger = params->gc_wmark; // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -241,6 +249,11 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); + // set num_data_zones = zns_num_zones - num_log_zones + info->num_data_zones = (*my_dev)->tparams.zns_num_zones - info->num_log_zones; + // set map's size = num_data_zones + info->map = (metadata_map **)calloc(info->num_data_zones, sizeof(metadata_map *)); + // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); @@ -249,8 +262,7 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * (*my_dev)->tparams.zns_lba_size; // set capacity_bytes = #zone * zone_capacity - (*my_dev)->capacity_bytes = (*my_dev)->tparams.zns_num_zones * - (*my_dev)->tparams.zns_zone_capacity; + (*my_dev)->capacity_bytes = info->num_data_zones * (*my_dev)->tparams.zns_zone_capacity; // init upper_logical_addr_bound return 0; } @@ -259,12 +271,11 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { unsigned long long physical_addr = 0; - zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous //Get physical addr mapped for the provided logical addr // bool get = false; - // int ret = lookup_map(info->map, address, &physical_addr, buffer, size, &get); - int ret = lookup_map(info->map, address, &physical_addr); + // int ret = lookup_map(my_dev, address, &physical_addr, buffer, size, &get); + int ret = lookup_map(my_dev, address, &physical_addr); if (ret) return ret; // if (!get) @@ -281,16 +292,17 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; check_to_trigger_GC(info, physical_addr); - // update_map(info->map, address, physical_addr, buffer, size); - update_map(info->map, address, physical_addr); + // update_map(my_dev, address, physical_addr, buffer, size); + update_map(my_dev, address, physical_addr); return 0; } int deinit_ss_zns_device(struct user_zns_device *my_dev) { - metadata_map **map = ((zns_info *)my_dev->_private)->map; + zns_info *info = (zns_info *)my_dev->_private; + metadata_map **map = info->map; //free hashmap - for (int i = 0; i < METADATA_MAP_LEN; ++i) { + for (int i = 0; i < info->num_data_zones; ++i) { while (map[i]) { // if (map[i]->data) // free(map[i]->data); @@ -299,16 +311,9 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) free(tmp); } } + free(map); free(my_dev->_private); free(my_dev); return 0; } - -//FIXME: Update log zone if current zone cant support current write req -/* -static int check_update_curr_log_zone_validity(zns_info *info, uint32_t size) { - if info -} -*/ - } From b85cc1f5ad887247d4dfb467cddc3e98ec448079 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 24 Sep 2022 18:52:09 +0000 Subject: [PATCH 020/101] add log_zone_info structure --- src/m23-ftl/zns_device.cpp | 104 ++++++++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 12 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 1d968d2..fc73cec 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -44,12 +44,19 @@ struct metadata_map { metadata_map *next; }; +struct log_zone_info { + unsigned long long num_valid_pages; // counter + // metadata_map **metadata; // which map data in this log zone + uint32_t *log_zone_index; + unsigned long long write_index; // like write pointer +}; + struct zns_info { // Fixed values int fd; unsigned nsid; int num_log_zones; - int num_data_zones; + uint32_t num_data_zones; int gc_trigger; unsigned long long zone_num_pages; // uint64_t upper_logical_addr_bound; @@ -58,6 +65,11 @@ struct zns_info { uint32_t no_of_used_log_zones; // Keep track of used log zones unsigned long long curr_log_zone_saddr; // Point to current log zone starting address metadata_map **map; // Hashmap to store log + log_zone_info *log_zones_info; + uint32_t *used_log_zones_list; // let the new log zone at the end of the array + uint32_t curr_used_log_zone_index; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity + uint32_t *free_zones_list; // use free() and calloc() to change size dynamically + }; @@ -66,7 +78,7 @@ static inline int hash_function(uint64_t key, uint32_t zns_zone_capacity) return key / zns_zone_capacity; } -static void check_to_trigger_GC(zns_info *info, unsigned long long last_append_addr) +static void trigger_GC(zns_info *info, unsigned long long last_append_addr) { //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition //Check if current log zone is ended, then change to next log zone @@ -131,25 +143,64 @@ static int lookup_map(user_zns_device *my_dev, return 1; } +static void update_curr_used_log_zone(zns_info *info, uint32_t num_lba) +{ + log_zone_info *log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; + if (log_zone->write_index + num_lba >= info->zone_num_pages) { + uint32_t curr_zone_num_lba = info->zone_num_pages - log_zone->write_index; + log_zone->write_index += curr_zone_num_lba; + log_zone->num_valid_pages += curr_zone_num_lba; + ++info->curr_used_log_zone_index; + if (info->curr_used_log_zone_index == (uint32_t)info->num_log_zones) { + --info->curr_used_log_zone_index; + // move current log_zone info to freed zone info place + log_zone->write_index = 0; + log_zone->num_valid_pages = 0; + // memset(log_zone->metadata, (int)NULL, info->zone_num_pages); + memset(log_zone->log_zone_index, -1, info->num_log_zones); + } + log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; + log_zone->write_index += num_lba - curr_zone_num_lba; + log_zone->num_valid_pages += num_lba - curr_zone_num_lba; + } else { + log_zone->write_index += num_lba; + log_zone->num_valid_pages += num_lba; + } +} + // static void update_map(user_zns_device *my_dev, // uint64_t logical_addr, unsigned long long physical_addr, // void *buf, uint32_t size) -static void update_map(user_zns_device *my_dev, - uint64_t logical_addr, unsigned long long physical_addr) +static void update_map_and_log_info(user_zns_device *my_dev, + uint64_t logical_addr, unsigned long long physical_addr, + uint32_t num_lba) { int index = hash_function(logical_addr, my_dev->tparams.zns_zone_capacity); zns_info *info = ((zns_info *)my_dev->_private); metadata_map **map = info->map; + log_zone_info *log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; //Fill in hashmap if (map[index] == NULL) { map[index] = (metadata_map *)calloc(1, sizeof(metadata_map)); map[index]->logical_addr = logical_addr; map[index]->physical_addr = physical_addr; + // log_zone->metadata[log_zone->write_index] = map[index]; + log_zone->log_zone_index[log_zone->write_index] = index; + update_curr_used_log_zone(info, num_lba); // update_cache(info, map[index], buf, size, 1); return; } if (map[index]->logical_addr == logical_addr) { + // for (unsigned long long i = 0; i < log_zone->write_index; ++i) { + // if (log_zone->metadata[i] == map[index]) { + // log_zone->metadata[i] = NULL; + // break; + // } + // } map[index]->physical_addr = physical_addr; + // log_zone->metadata[log_zone->write_index] = map[index]; + log_zone->log_zone_index[log_zone->write_index] = index; + update_curr_used_log_zone(info, num_lba); // free(map[index]->data); // used_buf_size -= map[index]->size; // update_cache(info, map[index], buf, size, 1); @@ -158,7 +209,16 @@ static void update_map(user_zns_device *my_dev, metadata_map *head = map[index]; while (head->next) { if (head->next->logical_addr == logical_addr) { + // for (unsigned long long i = 0; i < log_zone->write_index; ++i) { + // if (log_zone->metadata[i] == head->next) { + // log_zone->metadata[i] = NULL; + // break; + // } + // } head->next->physical_addr = physical_addr; + // log_zone->metadata[log_zone->write_index] = head->next; + log_zone->log_zone_index[log_zone->write_index] = index; + update_curr_used_log_zone(info, num_lba); // free(head->next->data); // used_buf_size -= head->next->size; // update_cache(info, head->next, buf, size, 1); @@ -169,14 +229,15 @@ static void update_map(user_zns_device *my_dev, head->next = (metadata_map *)calloc(1, sizeof(metadata_map)); head->next->logical_addr = logical_addr; head->next->physical_addr = physical_addr; + // log_zone->metadata[log_zone->write_index] = head->next; + log_zone->log_zone_index[log_zone->write_index] = index; + update_curr_used_log_zone(info, num_lba); // update_cache(info, head->next, buf, size, 1); } static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, void *buffer, uint32_t size) { - // void *metadata = NULL; - // unsigned metadata_len = 0; unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; zns_info *info = (zns_info *)my_dev->_private; nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, @@ -188,8 +249,6 @@ static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_a static int append_to_log_zone(user_zns_device *my_dev, unsigned long long *physical_addr, void *buffer, uint32_t size) { - // void *metadata = NULL; - // unsigned metadata_len = 0; unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; //calc from size and page_size //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. zns_info *info = (zns_info *)my_dev->_private; @@ -253,11 +312,24 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->num_data_zones = (*my_dev)->tparams.zns_num_zones - info->num_log_zones; // set map's size = num_data_zones info->map = (metadata_map **)calloc(info->num_data_zones, sizeof(metadata_map *)); - + // set used_log_zones_list + info->used_log_zones_list = (uint32_t *)calloc(info->num_log_zones, sizeof(uint32_t)); + for (int i = 0; i < info->num_log_zones; ++i) + info->used_log_zones_list[i] = i; + // set free_zones_list + info->free_zones_list = (uint32_t *)calloc(info->num_data_zones, sizeof(uint32_t)); + for (uint32_t i = info->num_log_zones; i < (*my_dev)->tparams.zns_num_zones; ++i) + info->free_zones_list[i - info->num_log_zones] = i; // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set log_zones_info + info->log_zones_info = (log_zone_info *)calloc(info->num_log_zones, sizeof(log_zone_info)); + // for (int i = 0; i < info->num_log_zones; ++i) + // info->log_zones_info[i].metadata = (metadata_map **)calloc(info->zone_num_pages, sizeof(metadata_map *)); + for (int i = 0; i < info->num_log_zones; ++i) + info->log_zones_info[i].log_zone_index = (uint32_t *)calloc(info->zone_num_pages, sizeof(uint32_t)); // set zns_zone_capacity = #page_per_zone * zone_size (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * (*my_dev)->tparams.zns_lba_size; @@ -291,9 +363,9 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int ret = append_to_log_zone(my_dev, &physical_addr, buffer, size); if (ret) return ret; - check_to_trigger_GC(info, physical_addr); // update_map(my_dev, address, physical_addr, buffer, size); - update_map(my_dev, address, physical_addr); + update_map_and_log_info(my_dev, address, physical_addr, size / my_dev->tparams.zns_lba_size); + trigger_GC(info, physical_addr); return 0; } @@ -302,7 +374,7 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) zns_info *info = (zns_info *)my_dev->_private; metadata_map **map = info->map; //free hashmap - for (int i = 0; i < info->num_data_zones; ++i) { + for (uint32_t i = 0; i < info->num_data_zones; ++i) { while (map[i]) { // if (map[i]->data) // free(map[i]->data); @@ -312,8 +384,16 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) } } free(map); + // for (int i = 0; i < info->num_log_zones; ++i) + // free(info->log_zones_info[i].metadata); + for (int i = 0; i < info->num_log_zones; ++i) + free(info->log_zones_info[i].log_zone_index); + free(info->log_zones_info); + free(info->used_log_zones_list); + free(info->free_zones_list); free(my_dev->_private); free(my_dev); return 0; } + } From 7e5d76c23d5d27888632e4c0c78fdd0a902d74ef Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 25 Sep 2022 14:17:56 +0000 Subject: [PATCH 021/101] [WIP] GC with deadlock --- src/m23-ftl/zns_device.cpp | 565 ++++++++++++++++++++++++++----------- 1 file changed, 403 insertions(+), 162 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 7e24928..1abfe04 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -25,6 +25,7 @@ SOFTWARE. #include #include #include +#include #include #include #include "zns_device.h" @@ -34,63 +35,259 @@ extern "C" { // enum {BUF_SIZE = 128 * 4096}; // static uint32_t used_buf_size = 0; -struct metadata_map { - //FIXME: Add No of blocks written as well. + +//Structure for zone in zns +struct zone_info { + pthread_mutex_t page_counter_lock; + uint32_t num_valid_pages; // counter + unsigned long long physical_zone_saddr; + zone_info *chain; //Chained in free_zones and used_log_zones_list + //TODO: LOCK +}; + +//Structure for pagemap in log +struct logpage_map { uint64_t logical_addr; unsigned long long physical_addr; - // void *data; - // uint32_t size; - // uint64_t count; - metadata_map *next; + zone_info *log_ptr; + logpage_map *next; //Logpage map for each logical block }; -struct log_zone_info { - unsigned long long num_valid_pages; // counter - // metadata_map **metadata; // which map data in this log zone - uint32_t *log_zone_index; - unsigned long long write_index; // like write pointer + +//Structure for logical block [contains page map and block map] +struct logical_block_map { + uint64_t logical_block_saddr; + logpage_map *log_head; //Log page mapping for this logical block + zone_info *block_ptr; //Point to zone_info + //TODO: LOCK the access + pthread_mutex_t logical_block_lock; }; struct zns_info { - // Fixed values - int fd; - unsigned nsid; - int num_log_zones; - uint32_t num_data_zones; + // Values from init parameters + uint32_t no_log_zones; int gc_trigger; - unsigned long long zone_num_pages; - uint32_t no_of_zones; - // uint64_t upper_logical_addr_bound; - + char device_name; + pthread_t gc_thread_id; + bool run_gc; + + // Query the nisd for following info + int fd; + unsigned nsid; + uint32_t zns_page_size; + uint32_t zns_pages_per_zone; + uint32_t zns_zones_count; + uint32_t data_zones_count; + pthread_mutex_t zones_list_lock; + // Log zone maintainance - uint32_t no_of_used_log_zones; // Keep track of used log zones - uint32_t no_of_log_zones; + uint32_t no_of_used_log_zones; + zone_info *used_log_zones_list; // let the new log zone at the end of the array + zone_info *curr_log_zone; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity - unsigned long long curr_log_zone_saddr; // Point to current log zone starting address - metadata_map **map; // Hashmap to store log - log_zone_info *log_zones_info; - uint32_t *used_log_zones_list; // let the new log zone at the end of the array - uint32_t curr_used_log_zone_index; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity - uint32_t *free_zones_list; // use free() and calloc() to change size dynamically + + //Logical to Physical mapping page and block + logical_block_map **map; // Page mapped hashmap for log zone + + //Free zones array + zone_info *free_zones_list; }; -static inline int hash_function(uint64_t key, uint32_t zns_zone_capacity) +static int read_from_nvme(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size) +{ + unsigned short number_of_pages = size / info->zns_page_size - 1; + nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, + 0, 0, 0, 0, 0, size, buffer, 0, NULL); + //ss_nvme_show_status(errno); + return errno; +} + +static int append_to_zone(zns_info *info, unsigned long long saddr, unsigned long long *physical_addr, + void *buffer, uint32_t size) +{ + unsigned short number_of_pages = size / info->zns_page_size - 1; //calc from size and page_size + //TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. + nvme_zns_append(info->fd, info->nsid, saddr, number_of_pages, + 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); + //ss_nvme_show_status(errno); + return errno; +} + + +static inline int hash_function(uint64_t key, uint32_t base) { - return key / zns_zone_capacity; + return key / base; } -static void trigger_GC(zns_info *info, unsigned long long last_append_addr) +static inline int offset_function(uint64_t key, uint32_t base) +{ + return key % base; +} + + +void increment_zone_valid_page_counter(zone_info *log) { + pthread_mutex_lock(&log->page_counter_lock); + log->num_valid_pages++; + pthread_mutex_unlock(&log->page_counter_lock); +} + +void decrement_zone_valid_page_counter(zone_info *log) { + pthread_mutex_lock(&log->page_counter_lock); + log->num_valid_pages--; + pthread_mutex_unlock(&log->page_counter_lock); +} + +//Change this func +static void check_to_change_log_zone(zns_info *info, unsigned long long last_append_addr) { //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition - //Check if current log zone is ended, then change to next log zone - if (last_append_addr - info->curr_log_zone_saddr == info->zone_num_pages - 1) { - ++info->no_of_used_log_zones; - info->curr_log_zone_saddr = last_append_addr + 1; + //Check if current log zone is ended, then change to next free log zone; FIXME + if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zns_pages_per_zone - 1) + return; + printf("Here waiting\n"); + pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; + if(!info->used_log_zones_list) + info->used_log_zones_list = info->curr_log_zone; + else { + zone_info *head = info->used_log_zones_list; + while(head->chain) + head = head->chain; + head->chain = info->curr_log_zone; + } + info->no_of_used_log_zones++; + pthread_mutex_unlock(&info->zones_list_lock); + printf("Here done\n"); + + //FIXME: Change the busy wait + printf("Waiting to free\n"); + while(info->no_of_used_log_zones == info->no_log_zones) { + continue; + } + printf("Freed\n"); + + + //Dequeue from free_zone to curr_log_zone; + pthread_mutex_lock(&info->zones_list_lock); + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + info->curr_log_zone->chain = NULL; + pthread_mutex_unlock(&info->zones_list_lock); + +} + + +void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { + for(uint32_t offset = 0; offset < info->zns_pages_per_zone; offset++) { + logpage_map *ptr = map->log_head; + bool flag = false; + uint64_t paddr; + while(ptr) { + if(ptr->logical_addr == map->logical_block_saddr + offset) { + paddr = ptr->physical_addr; + decrement_zone_valid_page_counter(ptr->log_ptr); + flag = true; + break; + } + ptr = ptr->next; + } + + //Get block + if((!flag)&&(map->block_ptr)){ + flag = true; + paddr = map->block_ptr->physical_zone_saddr + offset; + } + + void *buffer; + buffer = (void *)calloc(1, info->zns_page_size); + //Do nvme read on paddr + if(flag) + read_from_nvme(info, paddr, buffer, info->zns_page_size); + //Do nvme append new_zone->saddr + append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); + free(buffer); + increment_zone_valid_page_counter(new_zone); + } + +} + +void *gc_thread(void *info_ptr) { + zns_info *info = (zns_info*) info_ptr; + uint32_t index = 0; + while(info->run_gc) { + //Check condition + while(info->no_of_used_log_zones < info->gc_trigger) + continue; + + logical_block_map *ptr = info->map[index]; + if ((ptr == NULL)&&(ptr->log_head == NULL)) { + index = (index + 1) % info->data_zones_count; + continue; + } + + zone_info *free_zone, *old_zone; + pthread_mutex_lock(&info->zones_list_lock); + //Get free zone + free_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + free_zone->chain = NULL; + pthread_mutex_unlock(&info->zones_list_lock); + + pthread_mutex_lock(&ptr->logical_block_lock); + merge(info, ptr, free_zone); + old_zone = ptr->block_ptr; + ptr->block_ptr = free_zone; + pthread_mutex_unlock(&ptr->logical_block_lock); + + printf("Exec12\n"); + if(old_zone) + old_zone->num_valid_pages = 0; + pthread_mutex_lock(&info->zones_list_lock); + //Check used log zone valid counter if zero reset and add to free zone list + + //Append old data zone to free zones list + zone_info *head = info->free_zones_list; + if(old_zone) { + if(head) { + while(head->chain) + head = head->chain; + head->chain = old_zone; + } else { + head = old_zone; + } + } + //Reset if used log zone reference is zero + head = info->used_log_zones_list; + while(head) { + if(head->num_valid_pages == 0) { + //reset zone + nvme_zns_mgmt_send(info->fd, info->nsid, head->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + //append to free zones list + zone_info *temp = info->free_zones_list; + if(temp) { + while(temp->chain) + temp = temp->chain; + temp->chain = head; + } else { + temp = head; + } + info->no_of_used_log_zones--; + } + head = head->chain; + } + + pthread_mutex_unlock(&info->zones_list_lock); + + index = (index + 1) % info->data_zones_count; + printf("Exec\n"); } + return NULL; } + // static void update_cache(zns_info *info, metadata_map *metadata, // void *buf, uint32_t size, uint32_t count_threshold) // { @@ -123,12 +320,17 @@ static void trigger_GC(zns_info *info, unsigned long long last_append_addr) // static int lookup_map(user_zns_device *my_dev, // uint64_t logical_addr, unsigned long long *physical_addr, // void *buf, uint32_t size, bool *get) -static int lookup_map(user_zns_device *my_dev, +static int lookup_map(zns_info *info, uint64_t logical_addr, unsigned long long *physical_addr) { - int index = hash_function(logical_addr, my_dev->tparams.zns_zone_capacity); - zns_info *info = ((zns_info *)my_dev->_private); - metadata_map *head = info->map[index]; + int index = hash_function(logical_addr, info->zns_pages_per_zone); + + + //Lock the logical block + pthread_mutex_lock(&info->map[index]->logical_block_lock); + + //Search in log + logpage_map *head = info->map[index]->log_head; while (head) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; @@ -139,13 +341,24 @@ static int lookup_map(user_zns_device *my_dev, // } else { // update_cache(info, head, buf, size, head->count); // } + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return 0; } head = head->next; } - return 1; + + //If not present provide data block addr + uint32_t offset = offset_function(logical_addr, info->zns_pages_per_zone); + *physical_addr = info->map[index]->block_ptr->physical_zone_saddr + offset; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + + return 0; } + + +//FIXME: Check the func functianality +/* static void update_curr_used_log_zone(zns_info *info, uint32_t num_lba) { log_zone_info *log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; @@ -170,92 +383,78 @@ static void update_curr_used_log_zone(zns_info *info, uint32_t num_lba) log_zone->num_valid_pages += num_lba; } } +*/ + + + // static void update_map(user_zns_device *my_dev, // uint64_t logical_addr, unsigned long long physical_addr, // void *buf, uint32_t size) -static void update_map_and_log_info(user_zns_device *my_dev, - uint64_t logical_addr, unsigned long long physical_addr, - uint32_t num_lba) +static void update_map(zns_info *info, + uint64_t logical_addr, unsigned long long physical_addr) { - int index = hash_function(logical_addr, my_dev->tparams.zns_zone_capacity); - zns_info *info = ((zns_info *)my_dev->_private); - metadata_map **map = info->map; - log_zone_info *log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; + int index = hash_function(logical_addr, info->zns_pages_per_zone); + logical_block_map **map = info->map; //Fill in hashmap - if (map[index] == NULL) { - map[index] = (metadata_map *)calloc(1, sizeof(metadata_map)); - map[index]->logical_addr = logical_addr; - map[index]->physical_addr = physical_addr; - // log_zone->metadata[log_zone->write_index] = map[index]; - log_zone->log_zone_index[log_zone->write_index] = index; - update_curr_used_log_zone(info, num_lba); - // update_cache(info, map[index], buf, size, 1); + + //Lock for the update in log + pthread_mutex_lock(&info->map[index]->logical_block_lock); + if (map[index]->log_head == NULL) { + map[index]->log_head = (logpage_map *)calloc(1, sizeof(logpage_map)); + increment_zone_valid_page_counter(info->curr_log_zone); + map[index]->log_head->log_ptr = info->curr_log_zone; + map[index]->log_head->logical_addr = logical_addr; + map[index]->log_head->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } - if (map[index]->logical_addr == logical_addr) { - // for (unsigned long long i = 0; i < log_zone->write_index; ++i) { - // if (log_zone->metadata[i] == map[index]) { - // log_zone->metadata[i] = NULL; - // break; - // } - // } - map[index]->physical_addr = physical_addr; - // log_zone->metadata[log_zone->write_index] = map[index]; - log_zone->log_zone_index[log_zone->write_index] = index; - update_curr_used_log_zone(info, num_lba); - // free(map[index]->data); - // used_buf_size -= map[index]->size; - // update_cache(info, map[index], buf, size, 1); - return; + + if (map[index]->log_head->logical_addr == logical_addr) { + //Update log counter + decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); + increment_zone_valid_page_counter(info->curr_log_zone); + map[index]->log_head->log_ptr = info->curr_log_zone; + map[index]->log_head->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + return; } - metadata_map *head = map[index]; - while (head->next) { - if (head->next->logical_addr == logical_addr) { - // for (unsigned long long i = 0; i < log_zone->write_index; ++i) { - // if (log_zone->metadata[i] == head->next) { - // log_zone->metadata[i] = NULL; - // break; - // } - // } - head->next->physical_addr = physical_addr; - // log_zone->metadata[log_zone->write_index] = head->next; - log_zone->log_zone_index[log_zone->write_index] = index; - update_curr_used_log_zone(info, num_lba); - // free(head->next->data); - // used_buf_size -= head->next->size; - // update_cache(info, head->next, buf, size, 1); - return; + + logpage_map *ptr = map[index]->log_head; + while (ptr->next) { + if (ptr->next->logical_addr == logical_addr) { + //Update log counter + decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); + increment_zone_valid_page_counter(info->curr_log_zone); + ptr->next->log_ptr = info->curr_log_zone; + ptr->next->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + return; } - head = head->next; + ptr = ptr->next; } - head->next = (metadata_map *)calloc(1, sizeof(metadata_map)); - head->next->logical_addr = logical_addr; - head->next->physical_addr = physical_addr; - // log_zone->metadata[log_zone->write_index] = head->next; + ptr->next = (logpage_map *)calloc(1, sizeof(logpage_map)); + increment_zone_valid_page_counter(info->curr_log_zone); + ptr->next->log_ptr = info->curr_log_zone; + ptr->next->logical_addr = logical_addr; + ptr->next->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + return; + /* log_zone->log_zone_index[log_zone->write_index] = index; update_curr_used_log_zone(info, num_lba); + // free(head->next->data); + // used_buf_size -= head->next->size; // update_cache(info, head->next, buf, size, 1); + */ } -static int read_from_nvme(user_zns_device *my_dev, unsigned long long physical_addr, - void *buffer, uint32_t size) -{ - unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; - zns_info *info = (zns_info *)my_dev->_private; - nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, - 0, 0, 0, 0, 0, size, buffer, 0, NULL); - //ss_nvme_show_status(errno); - return errno; -} - -static int append_to_log_zone(user_zns_device *my_dev, unsigned long long *physical_addr, +static int append_to_log_zone(zns_info *info, unsigned long long *physical_addr, void *buffer, uint32_t size) { - unsigned short number_of_pages = size / my_dev->tparams.zns_lba_size - 1; //calc from size and page_size - //FIXME: Later make provision to include meta data containing lba and write size. For persistent log storage. - zns_info *info = (zns_info *)my_dev->_private; - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone_saddr, number_of_pages, + unsigned short number_of_pages = size / info->zns_page_size - 1; //calc from size and page_size + //TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, number_of_pages, 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); //ss_nvme_show_status(errno); return errno; @@ -263,19 +462,24 @@ static int append_to_log_zone(user_zns_device *my_dev, unsigned long long *physi int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { + //Assign the private ptr to zns_info *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; + // set num_log_zones - info->num_log_zones = params->log_zones; + info->no_log_zones = params->log_zones; // set gc_trigger info->gc_trigger = params->gc_wmark; + + // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { printf("Dev %s opened failed %d\n", params->name, info->fd); return errno; } + // set nsid int ret = nvme_get_nsid(info->fd, &info->nsid); if (ret) { @@ -291,7 +495,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return ret; } } - // set zns_lba_size + + // set zns_lba_size(or)zns_page_size : Its same for now! nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); if (ret) { @@ -299,8 +504,9 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return ret; } (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; - // set lba_size_bytes (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; + info->zns_page_size = (*my_dev)->tparams.zns_lba_size; + // set zns_num_zones nvme_zone_report zns_report; ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, @@ -311,38 +517,53 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->no_of_zones = (*my_dev)->tparams.zns_num_zones; + info->zns_zones_count = (*my_dev)->tparams.zns_num_zones; + // set num_data_zones = zns_num_zones - num_log_zones - info->num_data_zones = (*my_dev)->tparams.zns_num_zones - info->num_log_zones; - // set map's size = num_data_zones - info->map = (metadata_map **)calloc(info->num_data_zones, sizeof(metadata_map *)); - // set used_log_zones_list - info->used_log_zones_list = (uint32_t *)calloc(info->num_log_zones, sizeof(uint32_t)); - for (int i = 0; i < info->num_log_zones; ++i) - info->used_log_zones_list[i] = i; - // set free_zones_list - info->free_zones_list = (uint32_t *)calloc(info->num_data_zones, sizeof(uint32_t)); - for (uint32_t i = info->num_log_zones; i < (*my_dev)->tparams.zns_num_zones; ++i) - info->free_zones_list[i - info->num_log_zones] = i; + info->data_zones_count = info->zns_zones_count - info->no_log_zones; + // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); - info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; - // set log_zones_info - info->log_zones_info = (log_zone_info *)calloc(info->num_log_zones, sizeof(log_zone_info)); - // for (int i = 0; i < info->num_log_zones; ++i) - // info->log_zones_info[i].metadata = (metadata_map **)calloc(info->zone_num_pages, sizeof(metadata_map *)); - for (int i = 0; i < info->num_log_zones; ++i) - info->log_zones_info[i].log_zone_index = (uint32_t *)calloc(info->zone_num_pages, sizeof(uint32_t)); + info->zns_pages_per_zone = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + (*my_dev)->tparams.zns_zone_capacity = info->zns_pages_per_zone * (*my_dev)->tparams.zns_lba_size; - // set capacity_bytes = #zone * zone_capacity - (*my_dev)->capacity_bytes = ((*my_dev)->tparams.zns_num_zones - params->log_zones) * - (*my_dev)->tparams.zns_zone_capacity; - info->no_of_log_zones = params->log_zones; - (*my_dev)->capacity_bytes = info->num_data_zones * (*my_dev)->tparams.zns_zone_capacity; - // init upper_logical_addr_bound + + // set user capacity bytes = #data_zones * zone_capacity + (*my_dev)->capacity_bytes = info->data_zones_count * (*my_dev)->tparams.zns_zone_capacity; + + // set log zone page mapped hashmap size to num_data_zones + info->map = (logical_block_map **)calloc(info->data_zones_count, sizeof(logical_block_map *)); + + // set all zone index to free_zones_list + zone_info *head = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); + head->physical_zone_saddr = 0; + head->num_valid_pages = 0; + zone_info *tmp = head; + for (uint32_t i = 1; i < info->zns_zones_count; i++) { + tmp->chain = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); + tmp->chain->physical_zone_saddr = i*info->zns_pages_per_zone; + tmp->chain->num_valid_pages = 0; + tmp = tmp->chain; + } + info->free_zones_list = head; + + //Set current log zone to 0th zone + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + info->curr_log_zone->chain = NULL; + + for (uint32_t i = 0; i < info->data_zones_count; i++) { + info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); + info->map[i]->block_ptr = NULL; + info->map[i]->log_head = NULL; + } + + //Start GC + info->run_gc = true; + pthread_create(&info->gc_thread_id, NULL, &gc_thread, (void *)info); return 0; } @@ -350,15 +571,14 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { unsigned long long physical_addr = 0; + zns_info *info = (zns_info *)my_dev->_private; + //FIXME: Proision for contiguos block read, but not written contiguous - //Get physical addr mapped for the provided logical addr - // bool get = false; - // int ret = lookup_map(my_dev, address, &physical_addr, buffer, size, &get); - int ret = lookup_map(my_dev, address, &physical_addr); + int ret = lookup_map(info, address, &physical_addr); if (ret) return ret; // if (!get) - read_from_nvme(my_dev, physical_addr, buffer, size); + read_from_nvme(info, physical_addr, buffer, size); return errno; } @@ -367,37 +587,58 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, { unsigned long long physical_addr = 0; zns_info *info = (zns_info *)my_dev->_private; - int ret = append_to_log_zone(my_dev, &physical_addr, buffer, size); + int ret = append_to_log_zone(info, &physical_addr, buffer, size); if (ret) - return ret; - // update_map(my_dev, address, physical_addr, buffer, size); - update_map_and_log_info(my_dev, address, physical_addr, size / my_dev->tparams.zns_lba_size); - trigger_GC(info, physical_addr); + return ret; + update_map(info, address, physical_addr); + check_to_change_log_zone(info, physical_addr); return 0; } int deinit_ss_zns_device(struct user_zns_device *my_dev) { + zns_info *info = (zns_info *)my_dev->_private; - metadata_map **map = info->map; + + //Kill gc + info->run_gc = false; + pthread_join(info->gc_thread_id, NULL); + + logical_block_map **map = info->map; //free hashmap - for (uint32_t i = 0; i < info->num_data_zones; ++i) { - while (map[i]) { - // if (map[i]->data) - // free(map[i]->data); - metadata_map *tmp = map[i]; - map[i] = map[i]->next; - free(tmp); + for (uint32_t i = 0; i < info->data_zones_count; i++) { + if (map[i]==NULL) + continue; + + //Clear all log heads for a logical block + logpage_map *head = map[i]->log_head; + while (head) { + logpage_map *tmp = head->next; + free(head); + head = tmp; } + + free(map[i]->block_ptr); + + //Clear map[i] + free(map[i]); } free(map); - // for (int i = 0; i < info->num_log_zones; ++i) - // free(info->log_zones_info[i].metadata); - for (int i = 0; i < info->num_log_zones; ++i) - free(info->log_zones_info[i].log_zone_index); - free(info->log_zones_info); - free(info->used_log_zones_list); - free(info->free_zones_list); + + zone_info *head = info->used_log_zones_list; + while(head) { + zone_info *tmp = head->chain; + free(head); + head = tmp; + } + + head = info->free_zones_list; + while(head) { + zone_info *tmp = head->chain; + free(head); + head = tmp; + } + free(info->curr_log_zone); free(my_dev->_private); free(my_dev); return 0; From f3301c1061bf11a85c5ea0c0202af53ce9564775 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 25 Sep 2022 15:55:53 +0000 Subject: [PATCH 022/101] [WIP] Used log zone list removal bug --- src/m23-ftl/zns_device.cpp | 40 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 1abfe04..e5bb613 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -147,7 +147,6 @@ static void check_to_change_log_zone(zns_info *info, unsigned long long last_app //Check if current log zone is ended, then change to next free log zone; FIXME if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zns_pages_per_zone - 1) return; - printf("Here waiting\n"); pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; if(!info->used_log_zones_list) info->used_log_zones_list = info->curr_log_zone; @@ -159,14 +158,11 @@ static void check_to_change_log_zone(zns_info *info, unsigned long long last_app } info->no_of_used_log_zones++; pthread_mutex_unlock(&info->zones_list_lock); - printf("Here done\n"); //FIXME: Change the busy wait - printf("Waiting to free\n"); while(info->no_of_used_log_zones == info->no_log_zones) { continue; } - printf("Freed\n"); //Dequeue from free_zone to curr_log_zone; @@ -241,7 +237,6 @@ void *gc_thread(void *info_ptr) { ptr->block_ptr = free_zone; pthread_mutex_unlock(&ptr->logical_block_lock); - printf("Exec12\n"); if(old_zone) old_zone->num_valid_pages = 0; pthread_mutex_lock(&info->zones_list_lock); @@ -258,31 +253,25 @@ void *gc_thread(void *info_ptr) { head = old_zone; } } - //Reset if used log zone reference is zero - head = info->used_log_zones_list; - while(head) { - if(head->num_valid_pages == 0) { + + //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list + //Reset if used log zone : if valid pages is reference is zero + zone_info *copy = info->used_log_zones_list, *p1 = info->used_log_zones_list; + bool flag = false; + while(p1) { + if(p1->num_valid_pages == 0) { //reset zone nvme_zns_mgmt_send(info->fd, info->nsid, head->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); - //append to free zones list - zone_info *temp = info->free_zones_list; - if(temp) { - while(temp->chain) - temp = temp->chain; - temp->chain = head; - } else { - temp = head; - } - info->no_of_used_log_zones--; - } - head = head->chain; + + //Remove from used_log_zones + + //Append to free zones + } } - pthread_mutex_unlock(&info->zones_list_lock); index = (index + 1) % info->data_zones_count; - printf("Exec\n"); } return NULL; } @@ -589,9 +578,12 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; int ret = append_to_log_zone(info, &physical_addr, buffer, size); if (ret) - return ret; + return ret; + //printf("Wait1\n"); update_map(info, address, physical_addr); + //printf("Wait2\n %d",info->no_of_used_log_zones); check_to_change_log_zone(info, physical_addr); + //printf("Wait3\n"); return 0; } From 7eda604d4210e5048415f1a499079536575d32f3 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 25 Sep 2022 17:33:21 +0000 Subject: [PATCH 023/101] revise indentation --- src/m23-ftl/zns_device.cpp | 284 ++++++++++++++++++------------------- 1 file changed, 139 insertions(+), 145 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index e5bb613..24f83c1 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -25,8 +25,8 @@ SOFTWARE. #include #include #include -#include #include +#include #include #include "zns_device.h" @@ -35,7 +35,6 @@ extern "C" { // enum {BUF_SIZE = 128 * 4096}; // static uint32_t used_buf_size = 0; - //Structure for zone in zns struct zone_info { pthread_mutex_t page_counter_lock; @@ -84,12 +83,10 @@ struct zns_info { uint32_t no_of_used_log_zones; zone_info *used_log_zones_list; // let the new log zone at the end of the array zone_info *curr_log_zone; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity - - + //Logical to Physical mapping page and block logical_block_map **map; // Page mapped hashmap for log zone - //Free zones array zone_info *free_zones_list; }; @@ -127,16 +124,17 @@ static inline int offset_function(uint64_t key, uint32_t base) return key % base; } - -void increment_zone_valid_page_counter(zone_info *log) { +void increment_zone_valid_page_counter(zone_info *log) +{ pthread_mutex_lock(&log->page_counter_lock); - log->num_valid_pages++; + ++log->num_valid_pages; pthread_mutex_unlock(&log->page_counter_lock); } -void decrement_zone_valid_page_counter(zone_info *log) { +void decrement_zone_valid_page_counter(zone_info *log) +{ pthread_mutex_lock(&log->page_counter_lock); - log->num_valid_pages--; + --log->num_valid_pages; pthread_mutex_unlock(&log->page_counter_lock); } @@ -148,130 +146,126 @@ static void check_to_change_log_zone(zns_info *info, unsigned long long last_app if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zns_pages_per_zone - 1) return; pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; - if(!info->used_log_zones_list) - info->used_log_zones_list = info->curr_log_zone; - else { - zone_info *head = info->used_log_zones_list; - while(head->chain) - head = head->chain; - head->chain = info->curr_log_zone; - } - info->no_of_used_log_zones++; + if (!info->used_log_zones_list) { + info->used_log_zones_list = info->curr_log_zone; + } else { + zone_info *head = info->used_log_zones_list; + while(head->chain) + head = head->chain; + head->chain = info->curr_log_zone; + } + ++info->no_of_used_log_zones; pthread_mutex_unlock(&info->zones_list_lock); - //FIXME: Change the busy wait - while(info->no_of_used_log_zones == info->no_log_zones) { - continue; - } - - - //Dequeue from free_zone to curr_log_zone; - pthread_mutex_lock(&info->zones_list_lock); - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - info->curr_log_zone->chain = NULL; - pthread_mutex_unlock(&info->zones_list_lock); - -} + //FIXME: Change the busy wait + while (info->no_of_used_log_zones == info->no_log_zones) + continue; + //Dequeue from free_zone to curr_log_zone; + pthread_mutex_lock(&info->zones_list_lock); + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + info->curr_log_zone->chain = NULL; + pthread_mutex_unlock(&info->zones_list_lock); +} -void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { - for(uint32_t offset = 0; offset < info->zns_pages_per_zone; offset++) { +void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) +{ + for (uint32_t offset = 0; offset < info->zns_pages_per_zone; ++offset) { logpage_map *ptr = map->log_head; - bool flag = false; - uint64_t paddr; - while(ptr) { - if(ptr->logical_addr == map->logical_block_saddr + offset) { - paddr = ptr->physical_addr; - decrement_zone_valid_page_counter(ptr->log_ptr); - flag = true; - break; + bool flag = false; + uint64_t paddr; + while (ptr) { + if (ptr->logical_addr == map->logical_block_saddr + offset) { + paddr = ptr->physical_addr; + decrement_zone_valid_page_counter(ptr->log_ptr); + flag = true; + break; + } + ptr = ptr->next; } - ptr = ptr->next; - } - //Get block - if((!flag)&&(map->block_ptr)){ - flag = true; - paddr = map->block_ptr->physical_zone_saddr + offset; - } - - void *buffer; - buffer = (void *)calloc(1, info->zns_page_size); - //Do nvme read on paddr - if(flag) + //Get block + if (!flag && map->block_ptr) { + flag = true; + paddr = map->block_ptr->physical_zone_saddr + offset; + } + + void *buffer = (void *)calloc(1, info->zns_page_size); + //Do nvme read on paddr + if (flag) read_from_nvme(info, paddr, buffer, info->zns_page_size); - //Do nvme append new_zone->saddr + //Do nvme append new_zone->saddr append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); - free(buffer); - increment_zone_valid_page_counter(new_zone); - } - + free(buffer); + increment_zone_valid_page_counter(new_zone); + } } -void *gc_thread(void *info_ptr) { - zns_info *info = (zns_info*) info_ptr; +void *gc_thread(void *info_ptr) +{ + zns_info *info = (zns_info *)info_ptr; uint32_t index = 0; - while(info->run_gc) { + while (info->run_gc) { //Check condition - while(info->no_of_used_log_zones < info->gc_trigger) + while (info->no_of_used_log_zones < info->gc_trigger) continue; logical_block_map *ptr = info->map[index]; - if ((ptr == NULL)&&(ptr->log_head == NULL)) { - index = (index + 1) % info->data_zones_count; - continue; - } - - zone_info *free_zone, *old_zone; - pthread_mutex_lock(&info->zones_list_lock); - //Get free zone - free_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - free_zone->chain = NULL; - pthread_mutex_unlock(&info->zones_list_lock); - - pthread_mutex_lock(&ptr->logical_block_lock); - merge(info, ptr, free_zone); - old_zone = ptr->block_ptr; - ptr->block_ptr = free_zone; - pthread_mutex_unlock(&ptr->logical_block_lock); - - if(old_zone) - old_zone->num_valid_pages = 0; - pthread_mutex_lock(&info->zones_list_lock); - //Check used log zone valid counter if zero reset and add to free zone list - - //Append old data zone to free zones list - zone_info *head = info->free_zones_list; - if(old_zone) { - if(head) { - while(head->chain) + if (ptr == NULL && ptr->log_head == NULL) { + index = (index + 1) % info->data_zones_count; + continue; + } + + zone_info *free_zone, *old_zone; + pthread_mutex_lock(&info->zones_list_lock); + //Get free zone + free_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + free_zone->chain = NULL; + pthread_mutex_unlock(&info->zones_list_lock); + + pthread_mutex_lock(&ptr->logical_block_lock); + merge(info, ptr, free_zone); + old_zone = ptr->block_ptr; + ptr->block_ptr = free_zone; + pthread_mutex_unlock(&ptr->logical_block_lock); + + if(old_zone) + old_zone->num_valid_pages = 0; + pthread_mutex_lock(&info->zones_list_lock); + //Check used log zone valid counter if zero reset and add to free zone list + + //Append old data zone to free zones list + zone_info *head = info->free_zones_list; + if (old_zone) { + if (head) { + while(head->chain) head = head->chain; - head->chain = old_zone; - } else { - head = old_zone; - } + head->chain = old_zone; + } else { + head = old_zone; + } } - //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list - //Reset if used log zone : if valid pages is reference is zero - zone_info *copy = info->used_log_zones_list, *p1 = info->used_log_zones_list; - bool flag = false; - while(p1) { - if(p1->num_valid_pages == 0) { - //reset zone - nvme_zns_mgmt_send(info->fd, info->nsid, head->physical_zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0, NULL); - - //Remove from used_log_zones - - //Append to free zones - } - } - pthread_mutex_unlock(&info->zones_list_lock); - - index = (index + 1) % info->data_zones_count; + //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list + //Reset if used log zone : if valid pages is reference is zero + zone_info *copy = info->used_log_zones_list, *p1 = info->used_log_zones_list; + bool flag = false; + while (p1) { + if (p1->num_valid_pages == 0) { + //reset zone + nvme_zns_mgmt_send(info->fd, info->nsid, head->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + + //Remove from used_log_zones + + //Append to free zones + } + } + pthread_mutex_unlock(&info->zones_list_lock); + + index = (index + 1) % info->data_zones_count; } return NULL; } @@ -330,7 +324,7 @@ static int lookup_map(zns_info *info, // } else { // update_cache(info, head, buf, size, head->count); // } - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return 0; } head = head->next; @@ -400,25 +394,25 @@ static void update_map(zns_info *info, } if (map[index]->log_head->logical_addr == logical_addr) { - //Update log counter - decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); - increment_zone_valid_page_counter(info->curr_log_zone); - map[index]->log_head->log_ptr = info->curr_log_zone; + //Update log counter + decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); + increment_zone_valid_page_counter(info->curr_log_zone); + map[index]->log_head->log_ptr = info->curr_log_zone; map[index]->log_head->physical_addr = physical_addr; pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return; + return; } logpage_map *ptr = map[index]->log_head; while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); + decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); increment_zone_valid_page_counter(info->curr_log_zone); ptr->next->log_ptr = info->curr_log_zone; - ptr->next->physical_addr = physical_addr; + ptr->next->physical_addr = physical_addr; pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return; + return; } ptr = ptr->next; } @@ -531,11 +525,11 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * head->physical_zone_saddr = 0; head->num_valid_pages = 0; zone_info *tmp = head; - for (uint32_t i = 1; i < info->zns_zones_count; i++) { - tmp->chain = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); - tmp->chain->physical_zone_saddr = i*info->zns_pages_per_zone; - tmp->chain->num_valid_pages = 0; - tmp = tmp->chain; + for (uint32_t i = 1; i < info->zns_zones_count; ++i) { + tmp->chain = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); + tmp->chain->physical_zone_saddr = i * info->zns_pages_per_zone; + tmp->chain->num_valid_pages = 0; + tmp = tmp->chain; } info->free_zones_list = head; @@ -544,10 +538,10 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->free_zones_list = info->free_zones_list->chain; info->curr_log_zone->chain = NULL; - for (uint32_t i = 0; i < info->data_zones_count; i++) { + for (uint32_t i = 0; i < info->data_zones_count; ++i) { info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); info->map[i]->block_ptr = NULL; - info->map[i]->log_head = NULL; + info->map[i]->log_head = NULL; } //Start GC @@ -599,36 +593,36 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) logical_block_map **map = info->map; //free hashmap for (uint32_t i = 0; i < info->data_zones_count; i++) { - if (map[i]==NULL) - continue; + if (map[i] == NULL) + continue; - //Clear all log heads for a logical block + //Clear all log heads for a logical block logpage_map *head = map[i]->log_head; while (head) { logpage_map *tmp = head->next; free(head); - head = tmp; + head = tmp; } - free(map[i]->block_ptr); + free(map[i]->block_ptr); - //Clear map[i] - free(map[i]); + //Clear map[i] + free(map[i]); } free(map); zone_info *head = info->used_log_zones_list; - while(head) { - zone_info *tmp = head->chain; - free(head); - head = tmp; + while (head) { + zone_info *tmp = head->chain; + free(head); + head = tmp; } head = info->free_zones_list; while(head) { - zone_info *tmp = head->chain; - free(head); - head = tmp; + zone_info *tmp = head->chain; + free(head); + head = tmp; } free(info->curr_log_zone); free(my_dev->_private); From f9c954e0fa6e63761f727e10102ee11d8ecd58ac Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 25 Sep 2022 19:55:59 +0000 Subject: [PATCH 024/101] [WIP] write working code --- src/m23-ftl/zns_device.cpp | 193 ++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 111 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index e5bb613..014c022 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -42,7 +42,6 @@ struct zone_info { uint32_t num_valid_pages; // counter unsigned long long physical_zone_saddr; zone_info *chain; //Chained in free_zones and used_log_zones_list - //TODO: LOCK }; //Structure for pagemap in log @@ -128,6 +127,15 @@ static inline int offset_function(uint64_t key, uint32_t base) } +int count(zone_info *ptr) { + int count =0; + while(ptr) { + count++; + ptr=ptr->chain; + } + return count; +} + void increment_zone_valid_page_counter(zone_info *log) { pthread_mutex_lock(&log->page_counter_lock); log->num_valid_pages++; @@ -160,9 +168,11 @@ static void check_to_change_log_zone(zns_info *info, unsigned long long last_app pthread_mutex_unlock(&info->zones_list_lock); //FIXME: Change the busy wait - while(info->no_of_used_log_zones == info->no_log_zones) { + //printf("Wait here %d\n",info->no_of_used_log_zones); + while(info->no_of_used_log_zones == info->no_log_zones) { continue; } + //printf("Done here\n"); //Dequeue from free_zone to curr_log_zone; @@ -180,9 +190,11 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { logpage_map *ptr = map->log_head; bool flag = false; uint64_t paddr; + //printf("Logical block saddr : %ld\n",map->logical_block_saddr); while(ptr) { if(ptr->logical_addr == map->logical_block_saddr + offset) { paddr = ptr->physical_addr; + //printf("\tDecrement for %ld %d : ", ptr->log_ptr->physical_zone_saddr, ptr->log_ptr->num_valid_pages); decrement_zone_valid_page_counter(ptr->log_ptr); flag = true; break; @@ -197,16 +209,24 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { } void *buffer; - buffer = (void *)calloc(1, info->zns_page_size); + buffer = (void *)calloc(1,info->zns_page_size); //Do nvme read on paddr - if(flag) + if(flag) { read_from_nvme(info, paddr, buffer, info->zns_page_size); - //Do nvme append new_zone->saddr - append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); + //Do nvme append new_zone->saddr + append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); + } free(buffer); increment_zone_valid_page_counter(new_zone); } - + logpage_map *ptr = map->log_head; + while(ptr) { + logpage_map *tmp = ptr; + ptr = ptr->next; + free(tmp); + } + map->log_head = NULL; + map->block_ptr = new_zone; } void *gc_thread(void *info_ptr) { @@ -217,23 +237,27 @@ void *gc_thread(void *info_ptr) { while(info->no_of_used_log_zones < info->gc_trigger) continue; - logical_block_map *ptr = info->map[index]; - if ((ptr == NULL)&&(ptr->log_head == NULL)) { + logical_block_map *ptr = info->map[index]; + while(ptr->log_head == NULL) { index = (index + 1) % info->data_zones_count; - continue; + ptr = info->map[index]; + continue; } zone_info *free_zone, *old_zone; pthread_mutex_lock(&info->zones_list_lock); - //Get free zone + //Get free zone and nullify the chain free_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; free_zone->chain = NULL; pthread_mutex_unlock(&info->zones_list_lock); + pthread_mutex_lock(&ptr->logical_block_lock); - merge(info, ptr, free_zone); + //Merge the logical block to data zone old_zone = ptr->block_ptr; + merge(info, ptr, free_zone); + ptr->log_head = NULL; ptr->block_ptr = free_zone; pthread_mutex_unlock(&ptr->logical_block_lock); @@ -241,7 +265,6 @@ void *gc_thread(void *info_ptr) { old_zone->num_valid_pages = 0; pthread_mutex_lock(&info->zones_list_lock); //Check used log zone valid counter if zero reset and add to free zone list - //Append old data zone to free zones list zone_info *head = info->free_zones_list; if(old_zone) { @@ -256,59 +279,49 @@ void *gc_thread(void *info_ptr) { //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list //Reset if used log zone : if valid pages is reference is zero - zone_info *copy = info->used_log_zones_list, *p1 = info->used_log_zones_list; - bool flag = false; - while(p1) { - if(p1->num_valid_pages == 0) { + zone_info *tmp = info->used_log_zones_list, *prev = NULL, *sup; + while(tmp) { + bool flag = false; + if(tmp->num_valid_pages == 0) { //reset zone - nvme_zns_mgmt_send(info->fd, info->nsid, head->physical_zone_saddr, false, + nvme_zns_mgmt_send(info->fd, info->nsid, tmp->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); //Remove from used_log_zones - - //Append to free zones - } + info->no_of_used_log_zones--; + flag = true; + sup = tmp; //Mark tmp to sup + tmp = tmp->chain; //Move tmp to next + sup->chain = NULL; //Disconnect ptr chain + //Disconnect ptr from used log zone list + if(prev == NULL) + info->used_log_zones_list = tmp; + else + prev->chain = tmp; + + //Append sup to free zones + zone_info *itr = info->free_zones_list; + if(itr) { + while(itr->chain) + itr = itr->chain; + itr->chain = sup; + } else { + itr = sup; + } + + } + if(!flag) { + prev = tmp; + tmp = tmp->chain; + } } pthread_mutex_unlock(&info->zones_list_lock); - index = (index + 1) % info->data_zones_count; } return NULL; } -// static void update_cache(zns_info *info, metadata_map *metadata, -// void *buf, uint32_t size, uint32_t count_threshold) -// { -// metadata->data = NULL; -// metadata->size = 0; -// if (size <= BUF_SIZE - used_buf_size) { -// metadata->data = calloc(1, size); -// memcpy(metadata->data, buf, size); -// metadata->size = size; -// used_buf_size += size; -// return; -// } -// for (int i = 0; i < info->num_data_zones; ++i) { -// for (metadata_map *head = info->map[i]; head; head = head->next) { -// if (head->count < count_threshold && head->size >= size) { -// free(head->data); -// head->data = NULL; -// used_buf_size -= head->size; -// head->size = 0; -// metadata->data = calloc(1, size); -// memcpy(metadata->data, buf, size); -// metadata->size = size; -// used_buf_size += size; -// return; -// } -// } -// } -// } - -// static int lookup_map(user_zns_device *my_dev, -// uint64_t logical_addr, unsigned long long *physical_addr, -// void *buf, uint32_t size, bool *get) static int lookup_map(zns_info *info, uint64_t logical_addr, unsigned long long *physical_addr) { @@ -323,13 +336,6 @@ static int lookup_map(zns_info *info, while (head) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; - // ++head->count; - // if (head->size) { - // memcpy(buf, head->data, size); - // *get = true; - // } else { - // update_cache(info, head, buf, size, head->count); - // } pthread_mutex_unlock(&info->map[index]->logical_block_lock); return 0; } @@ -346,40 +352,6 @@ static int lookup_map(zns_info *info, -//FIXME: Check the func functianality -/* -static void update_curr_used_log_zone(zns_info *info, uint32_t num_lba) -{ - log_zone_info *log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; - if (log_zone->write_index + num_lba >= info->zone_num_pages) { - uint32_t curr_zone_num_lba = info->zone_num_pages - log_zone->write_index; - log_zone->write_index += curr_zone_num_lba; - log_zone->num_valid_pages += curr_zone_num_lba; - ++info->curr_used_log_zone_index; - if (info->curr_used_log_zone_index == (uint32_t)info->num_log_zones) { - --info->curr_used_log_zone_index; - // move current log_zone info to freed zone info place - log_zone->write_index = 0; - log_zone->num_valid_pages = 0; - // memset(log_zone->metadata, (int)NULL, info->zone_num_pages); - memset(log_zone->log_zone_index, -1, info->num_log_zones); - } - log_zone = &info->log_zones_info[info->curr_used_log_zone_index]; - log_zone->write_index += num_lba - curr_zone_num_lba; - log_zone->num_valid_pages += num_lba - curr_zone_num_lba; - } else { - log_zone->write_index += num_lba; - log_zone->num_valid_pages += num_lba; - } -} -*/ - - - - -// static void update_map(user_zns_device *my_dev, -// uint64_t logical_addr, unsigned long long physical_addr, -// void *buf, uint32_t size) static void update_map(zns_info *info, uint64_t logical_addr, unsigned long long physical_addr) { @@ -395,7 +367,8 @@ static void update_map(zns_info *info, map[index]->log_head->log_ptr = info->curr_log_zone; map[index]->log_head->logical_addr = logical_addr; map[index]->log_head->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + map[index]->log_head->next = NULL; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } @@ -405,6 +378,7 @@ static void update_map(zns_info *info, increment_zone_valid_page_counter(info->curr_log_zone); map[index]->log_head->log_ptr = info->curr_log_zone; map[index]->log_head->physical_addr = physical_addr; + map[index]->log_head->next = NULL; pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } @@ -417,7 +391,8 @@ static void update_map(zns_info *info, increment_zone_valid_page_counter(info->curr_log_zone); ptr->next->log_ptr = info->curr_log_zone; ptr->next->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + ptr->next->next = NULL; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } ptr = ptr->next; @@ -427,15 +402,9 @@ static void update_map(zns_info *info, ptr->next->log_ptr = info->curr_log_zone; ptr->next->logical_addr = logical_addr; ptr->next->physical_addr = physical_addr; + ptr->next->next = NULL; pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; - /* - log_zone->log_zone_index[log_zone->write_index] = index; - update_curr_used_log_zone(info, num_lba); - // free(head->next->data); - // used_buf_size -= head->next->size; - // update_cache(info, head->next, buf, size, 1); - */ } static int append_to_log_zone(zns_info *info, unsigned long long *physical_addr, @@ -527,27 +496,29 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->map = (logical_block_map **)calloc(info->data_zones_count, sizeof(logical_block_map *)); // set all zone index to free_zones_list - zone_info *head = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); + zone_info *head = (zone_info *)calloc(1, sizeof(zone_info)); head->physical_zone_saddr = 0; head->num_valid_pages = 0; zone_info *tmp = head; for (uint32_t i = 1; i < info->zns_zones_count; i++) { - tmp->chain = (zone_info *)calloc(info->zns_zones_count, sizeof(zone_info)); - tmp->chain->physical_zone_saddr = i*info->zns_pages_per_zone; + tmp->chain = (zone_info *)calloc(1, sizeof(zone_info)); + tmp->chain->physical_zone_saddr = i * info->zns_pages_per_zone; tmp->chain->num_valid_pages = 0; tmp = tmp->chain; } info->free_zones_list = head; - + printf("Free zones count %d\n", count(info->free_zones_list)); //Set current log zone to 0th zone info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; info->curr_log_zone->chain = NULL; - + printf("Free zones count %d\n", count(info->free_zones_list)); + for (uint32_t i = 0; i < info->data_zones_count; i++) { info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); info->map[i]->block_ptr = NULL; info->map[i]->log_head = NULL; + info->map[i]->logical_block_saddr = i * info->zns_pages_per_zone; } //Start GC @@ -563,7 +534,7 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous - int ret = lookup_map(info, address, &physical_addr); + int ret = lookup_map(info, address/info->zns_page_size, &physical_addr); if (ret) return ret; // if (!get) @@ -580,7 +551,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; //printf("Wait1\n"); - update_map(info, address, physical_addr); + update_map(info, address/info->zns_page_size, physical_addr); //printf("Wait2\n %d",info->no_of_used_log_zones); check_to_change_log_zone(info, physical_addr); //printf("Wait3\n"); From d57c25615c1aa0c5e467ac21e25f3635635c46e1 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 26 Sep 2022 13:00:19 +0000 Subject: [PATCH 025/101] Partial working patch --- src/m23-ftl/zns_device.cpp | 137 +++++++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 30 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index e7f12df..32a985c 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -26,6 +26,7 @@ SOFTWARE. #include #include #include +#include #include #include #include "zns_device.h" @@ -87,6 +88,7 @@ struct zns_info { logical_block_map **map; // Page mapped hashmap for log zone //Free zones array + uint32_t free_zones_count; zone_info *free_zones_list; }; @@ -147,6 +149,7 @@ void decrement_zone_valid_page_counter(zone_info *log) pthread_mutex_unlock(&log->page_counter_lock); } + //Change this func static void check_to_change_log_zone(zns_info *info, unsigned long long last_append_addr) { @@ -154,27 +157,85 @@ static void check_to_change_log_zone(zns_info *info, unsigned long long last_app //Check if current log zone is ended, then change to next free log zone; FIXME if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zns_pages_per_zone - 1) return; + pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; - if (!info->used_log_zones_list) { + if (info->used_log_zones_list == NULL) { info->used_log_zones_list = info->curr_log_zone; } else { zone_info *head = info->used_log_zones_list; - while(head->chain) + while(head->chain != NULL) head = head->chain; head->chain = info->curr_log_zone; } + info->curr_log_zone = NULL; ++info->no_of_used_log_zones; pthread_mutex_unlock(&info->zones_list_lock); + printf("Waiting\n"); while (info->no_of_used_log_zones == info->no_log_zones) continue; + printf("Done waiting\n"); //Dequeue from free_zone to curr_log_zone; + while(info->curr_log_zone == NULL) { pthread_mutex_lock(&info->zones_list_lock); + if (info->free_zones_count <= 1) { + pthread_mutex_unlock(&info->zones_list_lock); + //Sleep for some time + usleep(100); + continue; + } info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; info->curr_log_zone->chain = NULL; + info->free_zones_count--; pthread_mutex_unlock(&info->zones_list_lock); + break; + } + + if(info->curr_log_zone == NULL) { + printf("WARNING\n\n\n"); + } +} + + +void remove_log_zone_to_free_zone(zns_info *info) { + zone_info *tmp = info->used_log_zones_list, *prev = NULL, *sup; + while(tmp != NULL) { + bool flag = false; + if(tmp->num_valid_pages == 0) { + //reset zone + nvme_zns_mgmt_send(info->fd, info->nsid, tmp->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + + //Remove from used_log_zones + info->no_of_used_log_zones--; + flag = true; + sup = tmp; //Mark tmp to sup + tmp = tmp->chain; //Move tmp to next + sup->chain = NULL; //Disconnect ptr chain + //Disconnect ptr from used log zone list + if(prev == NULL) + info->used_log_zones_list = tmp; + else + prev->chain = tmp; + + //Append sup to free zones + zone_info *itr = info->free_zones_list; + if(itr) { + while(itr->chain) + itr = itr->chain; + itr->chain = sup; + } else { + info->free_zones_list = sup; + } + info->free_zones_count++; + } + if(!flag) { + prev = tmp; + tmp = tmp->chain; + } + } } void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) @@ -183,40 +244,51 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) logpage_map *ptr = map->log_head; bool flag = false; uint64_t paddr; - while (ptr) { + while (ptr != NULL) { if (ptr->logical_addr == map->logical_block_saddr + offset) { paddr = ptr->physical_addr; decrement_zone_valid_page_counter(ptr->log_ptr); flag = true; break; } - ptr = ptr->next; + ptr = ptr->next; } //Get block - if((!flag)&&(map->block_ptr)){ + if((!flag) && (map->block_ptr != NULL)){ flag = true; paddr = map->block_ptr->physical_zone_saddr + offset; } void *buffer; buffer = (void *)calloc(1,info->zns_page_size); + //Do nvme read on paddr if(flag) { read_from_nvme(info, paddr, buffer, info->zns_page_size); - //Do nvme append new_zone->saddr - append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); } - free(buffer); + //Do nvme append new_zone->saddr; Write invalid pages as well! + append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); increment_zone_valid_page_counter(new_zone); + + free(buffer); } + + //Free log_head and block_ptr of the logical block logpage_map *ptr = map->log_head; - while(ptr) { + while(ptr != NULL) { logpage_map *tmp = ptr; ptr = ptr->next; free(tmp); } map->log_head = NULL; + + //Zero the old block zone and change to new zone + if (map->block_ptr != NULL) { + nvme_zns_mgmt_send(info->fd, info->nsid, map->block_ptr->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + map->block_ptr->num_valid_pages = 0; + } map->block_ptr = new_zone; } @@ -228,20 +300,22 @@ void *gc_thread(void *info_ptr) //Check condition while (info->no_of_used_log_zones < info->gc_trigger) continue; - + + printf("Condition\n"); logical_block_map *ptr = info->map[index]; while(ptr->log_head == NULL) { index = (index + 1) % info->data_zones_count; ptr = info->map[index]; continue; } - + printf("Douing gc\n"); zone_info *free_zone, *old_zone; pthread_mutex_lock(&info->zones_list_lock); //Get free zone and nullify the chain free_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; free_zone->chain = NULL; + info->free_zones_count--; pthread_mutex_unlock(&info->zones_list_lock); @@ -249,31 +323,30 @@ void *gc_thread(void *info_ptr) //Merge the logical block to data zone old_zone = ptr->block_ptr; merge(info, ptr, free_zone); - ptr->log_head = NULL; - ptr->block_ptr = free_zone; pthread_mutex_unlock(&ptr->logical_block_lock); - if(old_zone) - old_zone->num_valid_pages = 0; pthread_mutex_lock(&info->zones_list_lock); //Check used log zone valid counter if zero reset and add to free zone list //Append old data zone to free zones list zone_info *head = info->free_zones_list; - if(old_zone) { - if(head) { - while(head->chain) + if(old_zone != NULL) { + if(head != NULL) { + while(head->chain != NULL) head = head->chain; head->chain = old_zone; } else { - head = old_zone; + info->free_zones_list = old_zone; } + info->free_zones_count++; } //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list //Reset if used log zone : if valid pages is reference is zero zone_info *tmp = info->used_log_zones_list, *prev = NULL, *sup; - while(tmp) { + //printf("exec\n"); + while(tmp != NULL) { bool flag = false; + //printf("Page count %d\n",tmp->num_valid_pages); if(tmp->num_valid_pages == 0) { //reset zone nvme_zns_mgmt_send(info->fd, info->nsid, tmp->physical_zone_saddr, false, @@ -291,6 +364,7 @@ void *gc_thread(void *info_ptr) else prev->chain = tmp; + //Append sup to free zones zone_info *itr = info->free_zones_list; if(itr) { @@ -298,15 +372,16 @@ void *gc_thread(void *info_ptr) itr = itr->chain; itr->chain = sup; } else { - itr = sup; + info->free_zones_list = sup; } - + info->free_zones_count++; } if(!flag) { prev = tmp; tmp = tmp->chain; } } + //printf("\n"); pthread_mutex_unlock(&info->zones_list_lock); index = (index + 1) % info->data_zones_count; } @@ -325,7 +400,7 @@ static int lookup_map(zns_info *info, //Search in log logpage_map *head = info->map[index]->log_head; - while (head) { + while (head != NULL) { if (head->logical_addr == logical_addr) { *physical_addr = head->physical_addr; pthread_mutex_unlock(&info->map[index]->logical_block_lock); @@ -338,7 +413,6 @@ static int lookup_map(zns_info *info, uint32_t offset = offset_function(logical_addr, info->zns_pages_per_zone); *physical_addr = info->map[index]->block_ptr->physical_zone_saddr + offset; pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return 0; } @@ -350,9 +424,10 @@ static void update_map(zns_info *info, int index = hash_function(logical_addr, info->zns_pages_per_zone); logical_block_map **map = info->map; //Fill in hashmap - + //printf("Added to %d\n",index); //Lock for the update in log pthread_mutex_lock(&info->map[index]->logical_block_lock); + if (map[index]->log_head == NULL) { map[index]->log_head = (logpage_map *)calloc(1, sizeof(logpage_map)); increment_zone_valid_page_counter(info->curr_log_zone); @@ -379,7 +454,7 @@ static void update_map(zns_info *info, while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); + decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); increment_zone_valid_page_counter(info->curr_log_zone); ptr->next->log_ptr = info->curr_log_zone; ptr->next->physical_addr = physical_addr; @@ -492,11 +567,13 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * head->physical_zone_saddr = 0; head->num_valid_pages = 0; zone_info *tmp = head; + info->free_zones_count = 1; for (uint32_t i = 1; i < info->zns_zones_count; i++) { tmp->chain = (zone_info *)calloc(1, sizeof(zone_info)); tmp->chain->physical_zone_saddr = i * info->zns_pages_per_zone; tmp->chain->num_valid_pages = 0; - tmp = tmp->chain; + tmp = tmp->chain; + info->free_zones_count++; } info->free_zones_list = head; @@ -504,7 +581,9 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; info->curr_log_zone->chain = NULL; + info->free_zones_count--; + //Set map for logical blocks for (uint32_t i = 0; i < info->data_zones_count; i++) { info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); info->map[i]->block_ptr = NULL; @@ -536,16 +615,14 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { + //FIXME: Add lock for entier operation unsigned long long physical_addr = 0; zns_info *info = (zns_info *)my_dev->_private; int ret = append_to_log_zone(info, &physical_addr, buffer, size); if (ret) return ret; - //printf("Wait1\n"); update_map(info, address/info->zns_page_size, physical_addr); - //printf("Wait2\n %d",info->no_of_used_log_zones); check_to_change_log_zone(info, physical_addr); - //printf("Wait3\n"); return 0; } From 411046c86ec5a43443378105133db0ffb10affcc Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 26 Sep 2022 15:58:34 +0000 Subject: [PATCH 026/101] Working patch 2 --- src/m23-ftl/zns_device.cpp | 582 ++++++++++++++++++------------------- 1 file changed, 278 insertions(+), 304 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index e7f12df..117358b 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -28,6 +28,7 @@ SOFTWARE. #include #include #include +#include #include "zns_device.h" extern "C" { @@ -37,25 +38,24 @@ extern "C" { //Structure for zone in zns struct zone_info { - pthread_mutex_t page_counter_lock; uint32_t num_valid_pages; // counter unsigned long long physical_zone_saddr; + pthread_mutex_t page_counter_lock; zone_info *chain; //Chained in free_zones and used_log_zones_list }; //Structure for pagemap in log -struct logpage_map { +struct page_map { uint64_t logical_addr; unsigned long long physical_addr; - zone_info *log_ptr; - logpage_map *next; //Logpage map for each logical block + zone_info *page_zone_info; + page_map *next; //Logpage map for each logical block }; - //Structure for logical block [contains page map and block map] struct logical_block_map { uint64_t logical_block_saddr; - logpage_map *log_head; //Log page mapping for this logical block + page_map *page_maps; //Log page mapping for this logical block zone_info *block_ptr; //Point to zone_info //TODO: LOCK the access pthread_mutex_t logical_block_lock; @@ -63,7 +63,7 @@ struct logical_block_map { struct zns_info { // Values from init parameters - uint32_t no_log_zones; + int num_log_zones; int gc_trigger; char device_name; pthread_t gc_thread_id; @@ -73,13 +73,13 @@ struct zns_info { int fd; unsigned nsid; uint32_t zns_page_size; - uint32_t zns_pages_per_zone; - uint32_t zns_zones_count; - uint32_t data_zones_count; + uint32_t zone_num_pages; + uint32_t zns_num_zones; + uint32_t num_data_zones; pthread_mutex_t zones_list_lock; // Log zone maintainance - uint32_t no_of_used_log_zones; + int num_used_log_zones; zone_info *used_log_zones_list; // let the new log zone at the end of the array zone_info *curr_log_zone; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity @@ -90,19 +90,33 @@ struct zns_info { zone_info *free_zones_list; }; +int count(zone_info *ptr) +{ + int count = 0; + while (ptr) { + ++count; + ptr = ptr->chain; + } + return count; +} -static int read_from_nvme(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size) +void increment_zone_valid_page_counter(zone_info *log) { - unsigned short number_of_pages = size / info->zns_page_size - 1; - nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, - 0, 0, 0, 0, 0, size, buffer, 0, NULL); - //ss_nvme_show_status(errno); - return errno; + pthread_mutex_lock(&log->page_counter_lock); + ++log->num_valid_pages; + pthread_mutex_unlock(&log->page_counter_lock); +} + +void decrement_zone_valid_page_counter(zone_info *log) +{ + pthread_mutex_lock(&log->page_counter_lock); + --log->num_valid_pages; + pthread_mutex_unlock(&log->page_counter_lock); } -static int append_to_zone(zns_info *info, unsigned long long saddr, unsigned long long *physical_addr, - void *buffer, uint32_t size) +static int append_to_zone(zns_info *info, unsigned long long saddr, + unsigned long long *physical_addr, + void *buffer, uint32_t size) { unsigned short number_of_pages = size / info->zns_page_size - 1; //calc from size and page_size //TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. @@ -112,317 +126,276 @@ static int append_to_zone(zns_info *info, unsigned long long saddr, unsigned lon return errno; } - -static inline int hash_function(uint64_t key, uint32_t base) +static int read_from_nvme(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size) { - return key / base; + unsigned short number_of_pages = size / info->zns_page_size - 1; + nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, + 0, 0, 0, 0, 0, size, buffer, 0, NULL); + //ss_nvme_show_status(errno); + return errno; } -static inline int offset_function(uint64_t key, uint32_t base) +zone_info *merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { - return key % base; + page_map *ptr = map->page_maps; + zone_info *old_used_zone = map->block_ptr; + for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { + uint64_t page_physical_addr = 0UL; + bool have_data = false; + if (old_used_zone) { + have_data = true; + page_physical_addr = old_used_zone->physical_zone_saddr + offset; + decrement_zone_valid_page_counter(old_used_zone); + } + if (ptr && ptr->logical_addr == map->logical_block_saddr + offset) { + page_physical_addr = ptr->physical_addr; + decrement_zone_valid_page_counter(ptr->page_zone_info); + have_data = true; + ptr = ptr->next; + } + + char *buffer = (char *)calloc(info->zns_page_size, sizeof(char)); + unsigned long long physical_addr = 0ULL; + //Do nvme read on paddr + if (have_data) { + read_from_nvme(info, page_physical_addr, buffer, info->zns_page_size); + } + //Do nvme append new_zone->saddr + append_to_zone(info, new_zone->physical_zone_saddr, &physical_addr, + buffer, info->zns_page_size); + free(buffer); + increment_zone_valid_page_counter(new_zone); + } + while (map->page_maps) { + page_map *tmp = map->page_maps; + map->page_maps = map->page_maps->next; + free(tmp); + } + + map->block_ptr = new_zone; + + if (old_used_zone) { + nvme_zns_mgmt_send(info->fd, info->nsid, + old_used_zone->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + zone_info *head = info->free_zones_list; + if (head) { + while (head->chain) + head = head->chain; + head->chain = old_used_zone; + } else { + info->free_zones_list = old_used_zone; + } + } + return old_used_zone; } +void *gc_thread(void *info_ptr) +{ + zns_info *info = (zns_info *)info_ptr; + uint32_t index = 0; + while (info->run_gc) { + //Check condition + while (info->num_used_log_zones < info->gc_trigger) + continue; + + logical_block_map *ptr = info->map[index]; + while (!ptr->page_maps) { + index = (index + 1) % info->num_data_zones; + ptr = info->map[index]; + continue; + } -int count(zone_info *ptr) { - int count =0; - while(ptr) { - count++; - ptr=ptr->chain; + pthread_mutex_lock(&info->zones_list_lock); + //Get free zone and nullify the chain + zone_info *free_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + free_zone->chain = NULL; + pthread_mutex_unlock(&info->zones_list_lock); + + pthread_mutex_lock(&ptr->logical_block_lock); + //Merge the logical block to data zone + zone_info *old_zone = merge(info, ptr, free_zone); + pthread_mutex_unlock(&ptr->logical_block_lock); + + pthread_mutex_lock(&info->zones_list_lock); + //Check used log zone valid counter if zero reset and add to free zone list + //Append old data zone to free zones list + //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list + //Reset if used log zone : if valid pages is reference is zero + for (zone_info *prev = NULL, *free = NULL, + *tmp = info->used_log_zones_list; tmp;) { + if(tmp->num_valid_pages <= 0) { + free = tmp; + tmp = tmp->chain; + if (!prev) { + info->used_log_zones_list = tmp; + } else { + prev->chain = tmp; + } + free->chain = NULL; + //reset + nvme_zns_mgmt_send(info->fd, info->nsid, + free->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + //Remove from used_log_zones + --info->num_used_log_zones; + //Append sup to free zones + zone_info *itr = info->free_zones_list; + if(itr) { + while(itr->chain) + itr = itr->chain; + itr->chain = free; + } else { + itr = free; + } + } else { + prev = tmp; + tmp = tmp->chain; + } + } + pthread_mutex_unlock(&info->zones_list_lock); + index = (index + 1) % info->num_data_zones; } - return count; + return NULL; } -void increment_zone_valid_page_counter(zone_info *log) +static inline int hash_function(uint32_t key, uint32_t base) { - pthread_mutex_lock(&log->page_counter_lock); - ++log->num_valid_pages; - pthread_mutex_unlock(&log->page_counter_lock); + return key / base; } -void decrement_zone_valid_page_counter(zone_info *log) +static inline int offset_function(uint32_t key, uint32_t base) { - pthread_mutex_lock(&log->page_counter_lock); - --log->num_valid_pages; - pthread_mutex_unlock(&log->page_counter_lock); + return key % base; } //Change this func -static void check_to_change_log_zone(zns_info *info, unsigned long long last_append_addr) +static void check_to_change_log_zone(zns_info *info, + unsigned long long last_append_addr) { //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition //Check if current log zone is ended, then change to next free log zone; FIXME - if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zns_pages_per_zone - 1) + if (last_append_addr - info->curr_log_zone->physical_zone_saddr < + info->zone_num_pages - 1) return; pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; if (!info->used_log_zones_list) { - info->used_log_zones_list = info->curr_log_zone; + info->used_log_zones_list = info->curr_log_zone; } else { zone_info *head = info->used_log_zones_list; while(head->chain) head = head->chain; head->chain = info->curr_log_zone; } - ++info->no_of_used_log_zones; + ++info->num_used_log_zones; + info->curr_log_zone = NULL; pthread_mutex_unlock(&info->zones_list_lock); - while (info->no_of_used_log_zones == info->no_log_zones) - continue; + while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; + while(info->curr_log_zone == NULL) { pthread_mutex_lock(&info->zones_list_lock); - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - info->curr_log_zone->chain = NULL; - pthread_mutex_unlock(&info->zones_list_lock); -} - -void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) -{ - for (uint32_t offset = 0; offset < info->zns_pages_per_zone; ++offset) { - logpage_map *ptr = map->log_head; - bool flag = false; - uint64_t paddr; - while (ptr) { - if (ptr->logical_addr == map->logical_block_saddr + offset) { - paddr = ptr->physical_addr; - decrement_zone_valid_page_counter(ptr->log_ptr); - flag = true; - break; - } - ptr = ptr->next; - } - - //Get block - if((!flag)&&(map->block_ptr)){ - flag = true; - paddr = map->block_ptr->physical_zone_saddr + offset; - } - - void *buffer; - buffer = (void *)calloc(1,info->zns_page_size); - //Do nvme read on paddr - if(flag) { - read_from_nvme(info, paddr, buffer, info->zns_page_size); - //Do nvme append new_zone->saddr - append_to_zone(info, new_zone->physical_zone_saddr, NULL, buffer, info->zns_page_size); - } - free(buffer); - increment_zone_valid_page_counter(new_zone); - } - logpage_map *ptr = map->log_head; - while(ptr) { - logpage_map *tmp = ptr; - ptr = ptr->next; - free(tmp); - } - map->log_head = NULL; - map->block_ptr = new_zone; -} - -void *gc_thread(void *info_ptr) -{ - zns_info *info = (zns_info *)info_ptr; - uint32_t index = 0; - while (info->run_gc) { - //Check condition - while (info->no_of_used_log_zones < info->gc_trigger) - continue; - - logical_block_map *ptr = info->map[index]; - while(ptr->log_head == NULL) { - index = (index + 1) % info->data_zones_count; - ptr = info->map[index]; - continue; - } - - zone_info *free_zone, *old_zone; - pthread_mutex_lock(&info->zones_list_lock); - //Get free zone and nullify the chain - free_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - free_zone->chain = NULL; - pthread_mutex_unlock(&info->zones_list_lock); - - - pthread_mutex_lock(&ptr->logical_block_lock); - //Merge the logical block to data zone - old_zone = ptr->block_ptr; - merge(info, ptr, free_zone); - ptr->log_head = NULL; - ptr->block_ptr = free_zone; - pthread_mutex_unlock(&ptr->logical_block_lock); - - if(old_zone) - old_zone->num_valid_pages = 0; - pthread_mutex_lock(&info->zones_list_lock); - //Check used log zone valid counter if zero reset and add to free zone list - //Append old data zone to free zones list - zone_info *head = info->free_zones_list; - if(old_zone) { - if(head) { - while(head->chain) - head = head->chain; - head->chain = old_zone; - } else { - head = old_zone; - } - } - - //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list - //Reset if used log zone : if valid pages is reference is zero - zone_info *tmp = info->used_log_zones_list, *prev = NULL, *sup; - while(tmp) { - bool flag = false; - if(tmp->num_valid_pages == 0) { - //reset zone - nvme_zns_mgmt_send(info->fd, info->nsid, tmp->physical_zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0, NULL); - - //Remove from used_log_zones - info->no_of_used_log_zones--; - flag = true; - sup = tmp; //Mark tmp to sup - tmp = tmp->chain; //Move tmp to next - sup->chain = NULL; //Disconnect ptr chain - //Disconnect ptr from used log zone list - if(prev == NULL) - info->used_log_zones_list = tmp; - else - prev->chain = tmp; - - //Append sup to free zones - zone_info *itr = info->free_zones_list; - if(itr) { - while(itr->chain) - itr = itr->chain; - itr->chain = sup; - } else { - itr = sup; - } - - } - if(!flag) { - prev = tmp; - tmp = tmp->chain; - } - } - pthread_mutex_unlock(&info->zones_list_lock); - index = (index + 1) % info->data_zones_count; + if (count(info->free_zones_list) > 1) { + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + info->curr_log_zone->chain = NULL; + info->curr_log_zone->num_valid_pages = 0; } - return NULL; -} - - -static int lookup_map(zns_info *info, - uint64_t logical_addr, unsigned long long *physical_addr) -{ - int index = hash_function(logical_addr, info->zns_pages_per_zone); - - - //Lock the logical block - pthread_mutex_lock(&info->map[index]->logical_block_lock); - - //Search in log - logpage_map *head = info->map[index]->log_head; - while (head) { - if (head->logical_addr == logical_addr) { - *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return 0; - } - head = head->next; + pthread_mutex_unlock(&info->zones_list_lock); + usleep(100); } - - //If not present provide data block addr - uint32_t offset = offset_function(logical_addr, info->zns_pages_per_zone); - *physical_addr = info->map[index]->block_ptr->physical_zone_saddr + offset; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); - - return 0; } - - static void update_map(zns_info *info, - uint64_t logical_addr, unsigned long long physical_addr) + uint32_t logical_addr, unsigned long long physical_addr) { - int index = hash_function(logical_addr, info->zns_pages_per_zone); + int index = hash_function(logical_addr, info->zone_num_pages); logical_block_map **map = info->map; + increment_zone_valid_page_counter(info->curr_log_zone); //Fill in hashmap //Lock for the update in log pthread_mutex_lock(&info->map[index]->logical_block_lock); - if (map[index]->log_head == NULL) { - map[index]->log_head = (logpage_map *)calloc(1, sizeof(logpage_map)); - increment_zone_valid_page_counter(info->curr_log_zone); - map[index]->log_head->log_ptr = info->curr_log_zone; - map[index]->log_head->logical_addr = logical_addr; - map[index]->log_head->physical_addr = physical_addr; - map[index]->log_head->next = NULL; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + if (map[index]->page_maps == NULL) { + map[index]->page_maps = (page_map *)calloc(1, sizeof(page_map)); + map[index]->page_maps->page_zone_info = info->curr_log_zone; + map[index]->page_maps->logical_addr = logical_addr; + map[index]->page_maps->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } - if (map[index]->log_head->logical_addr == logical_addr) { + if (map[index]->page_maps->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); - increment_zone_valid_page_counter(info->curr_log_zone); - map[index]->log_head->log_ptr = info->curr_log_zone; - map[index]->log_head->physical_addr = physical_addr; - map[index]->log_head->next = NULL; + decrement_zone_valid_page_counter(map[index]->page_maps->page_zone_info); + map[index]->page_maps->page_zone_info = info->curr_log_zone; + map[index]->page_maps->physical_addr = physical_addr; pthread_mutex_unlock(&info->map[index]->logical_block_lock); return; } - logpage_map *ptr = map[index]->log_head; + page_map *ptr = map[index]->page_maps; while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(map[index]->log_head->log_ptr); - increment_zone_valid_page_counter(info->curr_log_zone); - ptr->next->log_ptr = info->curr_log_zone; - ptr->next->physical_addr = physical_addr; - ptr->next->next = NULL; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return; + decrement_zone_valid_page_counter(ptr->next->page_zone_info); + ptr->next->page_zone_info = info->curr_log_zone; + ptr->next->physical_addr = physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + return; } ptr = ptr->next; } - ptr->next = (logpage_map *)calloc(1, sizeof(logpage_map)); - increment_zone_valid_page_counter(info->curr_log_zone); - ptr->next->log_ptr = info->curr_log_zone; + ptr->next = (page_map *)calloc(1, sizeof(page_map)); + ptr->next->page_zone_info = info->curr_log_zone; ptr->next->logical_addr = logical_addr; ptr->next->physical_addr = physical_addr; - ptr->next->next = NULL; pthread_mutex_unlock(&info->map[index]->logical_block_lock); - return; } -static int append_to_log_zone(zns_info *info, unsigned long long *physical_addr, - void *buffer, uint32_t size) +static int lookup_map(zns_info *info, + uint32_t logical_page_addr, unsigned long long *physical_addr) { - unsigned short number_of_pages = size / info->zns_page_size - 1; //calc from size and page_size - //TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, number_of_pages, - 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); - //ss_nvme_show_status(errno); - return errno; + int index = hash_function(logical_page_addr, info->zone_num_pages); + + //Lock the logical block + pthread_mutex_lock(&info->map[index]->logical_block_lock); + + //Search in log + page_map *head = info->map[index]->page_maps; + while (head) { + if (head->logical_addr == logical_page_addr) { + *physical_addr = head->physical_addr; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + return 0; + } + head = head->next; + } + + //If not present provide data block addr + uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); + *physical_addr = info->map[index]->block_ptr->physical_zone_saddr + offset; + pthread_mutex_unlock(&info->map[index]->logical_block_lock); + + return 0; } -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) +int init_ss_zns_device(struct zdev_init_params *params, + struct user_zns_device **my_dev) { //Assign the private ptr to zns_info *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; - + // set num_log_zones - info->no_log_zones = params->log_zones; + info->num_log_zones = params->log_zones; // set gc_trigger info->gc_trigger = params->gc_wmark; - // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -467,49 +440,50 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->zns_zones_count = (*my_dev)->tparams.zns_num_zones; + info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; // set num_data_zones = zns_num_zones - num_log_zones - info->data_zones_count = info->zns_zones_count - info->no_log_zones; + info->num_data_zones = info->zns_num_zones - info->num_log_zones; // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); - info->zns_pages_per_zone = data.lbafe[ns.flbas & 0xF].zsze; - + info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zns_pages_per_zone * + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * (*my_dev)->tparams.zns_lba_size; // set user capacity bytes = #data_zones * zone_capacity - (*my_dev)->capacity_bytes = info->data_zones_count * (*my_dev)->tparams.zns_zone_capacity; + (*my_dev)->capacity_bytes = (info->num_data_zones) * + (*my_dev)->tparams.zns_zone_capacity; // set log zone page mapped hashmap size to num_data_zones - info->map = (logical_block_map **)calloc(info->data_zones_count, sizeof(logical_block_map *)); - + info->map = (logical_block_map **)calloc(info->num_data_zones, + sizeof(logical_block_map *)); + // init zones_list_lock + pthread_mutex_init(&info->zones_list_lock, NULL); // set all zone index to free_zones_list - zone_info *head = (zone_info *)calloc(1, sizeof(zone_info)); - head->physical_zone_saddr = 0; - head->num_valid_pages = 0; - zone_info *tmp = head; - for (uint32_t i = 1; i < info->zns_zones_count; i++) { - tmp->chain = (zone_info *)calloc(1, sizeof(zone_info)); - tmp->chain->physical_zone_saddr = i * info->zns_pages_per_zone; - tmp->chain->num_valid_pages = 0; - tmp = tmp->chain; + info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); + pthread_mutex_init(&info->free_zones_list->page_counter_lock, NULL); + zone_info *tmp = info->free_zones_list; + for (uint32_t i = 1; i < info->zns_num_zones; ++i) { + tmp->chain = (zone_info *)calloc(1, sizeof(zone_info)); + tmp->chain->physical_zone_saddr = i * info->zone_num_pages; + pthread_mutex_init(&tmp->chain->page_counter_lock, NULL); + tmp = tmp->chain; } - info->free_zones_list = head; //Set current log zone to 0th zone info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; info->curr_log_zone->chain = NULL; + info->curr_log_zone->num_valid_pages = 0; - for (uint32_t i = 0; i < info->data_zones_count; i++) { + for (uint32_t i = 0; i < info->num_data_zones; ++i) { info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); - info->map[i]->block_ptr = NULL; - info->map[i]->log_head = NULL; - info->map[i]->logical_block_saddr = i * info->zns_pages_per_zone; + info->map[i]->logical_block_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->map[i]->logical_block_lock, NULL); } //Start GC @@ -521,11 +495,10 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { - unsigned long long physical_addr = 0; + unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; - //FIXME: Proision for contiguos block read, but not written contiguous - int ret = lookup_map(info, address/info->zns_page_size, &physical_addr); + int ret = lookup_map(info, address / info->zns_page_size, &physical_addr); if (ret) return ret; // if (!get) @@ -536,14 +509,15 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { - unsigned long long physical_addr = 0; + unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; - int ret = append_to_log_zone(info, &physical_addr, buffer, size); + int ret = append_to_zone(info, info->curr_log_zone->physical_zone_saddr, + &physical_addr, buffer, size); if (ret) return ret; //printf("Wait1\n"); - update_map(info, address/info->zns_page_size, physical_addr); - //printf("Wait2\n %d",info->no_of_used_log_zones); + update_map(info, address / info->zns_page_size, physical_addr); + //printf("Wait2\n %d",info->num_used_log_zones); check_to_change_log_zone(info, physical_addr); //printf("Wait3\n"); return 0; @@ -551,7 +525,6 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int deinit_ss_zns_device(struct user_zns_device *my_dev) { - zns_info *info = (zns_info *)my_dev->_private; //Kill gc @@ -560,39 +533,40 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) logical_block_map **map = info->map; //free hashmap - for (uint32_t i = 0; i < info->data_zones_count; i++) { + for (uint32_t i = 0; i < info->num_data_zones; ++i) { if (map[i] == NULL) continue; //Clear all log heads for a logical block - logpage_map *head = map[i]->log_head; - while (head) { - logpage_map *tmp = head->next; - free(head); - head = tmp; + while (map[i]->page_maps) { + page_map *tmp = map[i]->page_maps; + map[i]->page_maps = map[i]->page_maps->next; + free(tmp); } - - free(map[i]->block_ptr); - + if (map[i]->block_ptr) + free(map[i]->block_ptr); + pthread_mutex_destroy(&map[i]->logical_block_lock); //Clear map[i] free(map[i]); } free(map); - zone_info *head = info->used_log_zones_list; - while (head) { - zone_info *tmp = head->chain; - free(head); - head = tmp; + while (info->used_log_zones_list) { + zone_info *tmp = info->used_log_zones_list; + info->used_log_zones_list = info->used_log_zones_list->chain; + pthread_mutex_destroy(&tmp->page_counter_lock); + free(tmp); } - head = info->free_zones_list; - while(head) { - zone_info *tmp = head->chain; - free(head); - head = tmp; + while (info->free_zones_list) { + zone_info *tmp = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + pthread_mutex_destroy(&tmp->page_counter_lock); + free(tmp); } + pthread_mutex_destroy(&info->curr_log_zone->page_counter_lock); free(info->curr_log_zone); + pthread_mutex_destroy(&info->zones_list_lock); free(my_dev->_private); free(my_dev); return 0; From 5f9945ca75d791a54558b02e032a8ff008c23468 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 26 Sep 2022 17:44:39 +0000 Subject: [PATCH 027/101] The working patch --- src/m23-ftl/zns_device.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index d5d7cd9..a4c8989 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -138,9 +138,9 @@ static int read_from_nvme(zns_info *info, unsigned long long physical_addr, return errno; } -zone_info *merge(zns_info *info, logical_block_map *map, zone_info *new_zone) +void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { - page_map *ptr = map->page_maps; + page_map *head = map->page_maps; zone_info *old_used_zone = map->block_ptr; for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { uint64_t page_physical_addr = 0UL; @@ -150,10 +150,14 @@ zone_info *merge(zns_info *info, logical_block_map *map, zone_info *new_zone) page_physical_addr = old_used_zone->physical_zone_saddr + offset; decrement_zone_valid_page_counter(old_used_zone); } - if (ptr && ptr->logical_addr == map->logical_block_saddr + offset) { - page_physical_addr = ptr->physical_addr; - decrement_zone_valid_page_counter(ptr->page_zone_info); - have_data = true; + + page_map *ptr = head; + while (ptr) { + if(ptr->logical_addr == map->logical_block_saddr + offset) { + page_physical_addr = ptr->physical_addr; + decrement_zone_valid_page_counter(ptr->page_zone_info); + have_data = true; + } ptr = ptr->next; } @@ -190,7 +194,6 @@ zone_info *merge(zns_info *info, logical_block_map *map, zone_info *new_zone) info->free_zones_list = old_used_zone; } } - return old_used_zone; } void *gc_thread(void *info_ptr) @@ -204,7 +207,8 @@ void *gc_thread(void *info_ptr) logical_block_map *ptr = info->map[index]; while (!ptr->page_maps) { - index = (index + 1) % info->num_data_zones; + printf("Here\n"); + index = (index + 1) % info->num_data_zones; ptr = info->map[index]; continue; } @@ -218,7 +222,7 @@ void *gc_thread(void *info_ptr) pthread_mutex_lock(&ptr->logical_block_lock); //Merge the logical block to data zone - zone_info *old_zone = merge(info, ptr, free_zone); + merge(info, ptr, free_zone); pthread_mutex_unlock(&ptr->logical_block_lock); pthread_mutex_lock(&info->zones_list_lock); From de6960b2976a9e02d03576de9c23584cf926d8c6 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 26 Sep 2022 18:16:02 +0000 Subject: [PATCH 028/101] Bug fixes in working patch --- src/m23-ftl/zns_device.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a4c8989..8beaafa 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -202,17 +202,20 @@ void *gc_thread(void *info_ptr) uint32_t index = 0; while (info->run_gc) { //Check condition - while (info->num_used_log_zones < info->gc_trigger) + while ((info->num_used_log_zones < info->gc_trigger)&&(info->run_gc)) continue; + logical_block_map *ptr = info->map[index]; - while (!ptr->page_maps) { - printf("Here\n"); + while((!ptr->page_maps)&&(info->run_gc)) { index = (index + 1) % info->num_data_zones; ptr = info->map[index]; continue; } + if(!info->run_gc) + break; + pthread_mutex_lock(&info->zones_list_lock); //Get free zone and nullify the chain zone_info *free_zone = info->free_zones_list; @@ -537,7 +540,8 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) //Kill gc info->run_gc = false; pthread_join(info->gc_thread_id, NULL); - + + logical_block_map **map = info->map; //free hashmap for (uint32_t i = 0; i < info->num_data_zones; ++i) { From b6d8a695af490e24cdc281b10cae74219d3def12 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 26 Sep 2022 21:09:19 +0000 Subject: [PATCH 029/101] fixed some bugs --- src/m23-ftl/zns_device.cpp | 242 ++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 8beaafa..3f716be 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -85,22 +85,23 @@ struct zns_info { zone_info *curr_log_zone; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity //Logical to Physical mapping page and block - logical_block_map **map; // Page mapped hashmap for log zone + logical_block_map **logical_block_maps; // Page mapped hashmap for log zone //Free zones array - uint32_t free_zones_count; + uint32_t num_free_zones; zone_info *free_zones_list; + zone_info *free_zones_list_tail; }; -int count(zone_info *ptr) -{ - int count = 0; - while (ptr) { - ++count; - ptr = ptr->chain; - } - return count; -} +// int count(zone_info *ptr) +// { +// int count = 0; +// while (ptr) { +// ++count; +// ptr = ptr->chain; +// } +// return count; +// } void increment_zone_valid_page_counter(zone_info *log) { @@ -140,34 +141,28 @@ static int read_from_nvme(zns_info *info, unsigned long long physical_addr, void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { - page_map *head = map->page_maps; zone_info *old_used_zone = map->block_ptr; for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { - uint64_t page_physical_addr = 0UL; + unsigned long long page_physical_addr = 0ULL; bool have_data = false; if (old_used_zone) { have_data = true; page_physical_addr = old_used_zone->physical_zone_saddr + offset; decrement_zone_valid_page_counter(old_used_zone); } - - page_map *ptr = head; - while (ptr) { - if(ptr->logical_addr == map->logical_block_saddr + offset) { - page_physical_addr = ptr->physical_addr; - decrement_zone_valid_page_counter(ptr->page_zone_info); - have_data = true; - } - ptr = ptr->next; + for (page_map *head = map->page_maps; head; head = head->next) { + if (head->logical_addr == map->logical_block_saddr + offset) { + page_physical_addr = head->physical_addr; + decrement_zone_valid_page_counter(head->page_zone_info); + have_data = true; + } } - char *buffer = (char *)calloc(info->zns_page_size, sizeof(char)); - unsigned long long physical_addr = 0ULL; //Do nvme read on paddr - if (have_data) { + if (have_data) read_from_nvme(info, page_physical_addr, buffer, info->zns_page_size); - } //Do nvme append new_zone->saddr + unsigned long long physical_addr = 0ULL; append_to_zone(info, new_zone->physical_zone_saddr, &physical_addr, buffer, info->zns_page_size); free(buffer); @@ -178,21 +173,23 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) map->page_maps = map->page_maps->next; free(tmp); } - map->block_ptr = new_zone; - if (old_used_zone) { nvme_zns_mgmt_send(info->fd, info->nsid, - old_used_zone->physical_zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0, NULL); - zone_info *head = info->free_zones_list; - if (head) { - while (head->chain) - head = head->chain; - head->chain = old_used_zone; + old_used_zone->physical_zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0, NULL); + // zone_info *head = info->free_zones_list; + if (info->free_zones_list) { + // while (head->chain) + // head = head->chain; + // head->chain = old_used_zone; + info->free_zones_list_tail->chain = old_used_zone; + info->free_zones_list_tail = info->free_zones_list_tail->chain; } else { info->free_zones_list = old_used_zone; + info->free_zones_list_tail = old_used_zone; } + ++info->num_free_zones; } } @@ -202,30 +199,31 @@ void *gc_thread(void *info_ptr) uint32_t index = 0; while (info->run_gc) { //Check condition - while ((info->num_used_log_zones < info->gc_trigger)&&(info->run_gc)) + while (info->num_used_log_zones < info->gc_trigger && info->run_gc) continue; - - - logical_block_map *ptr = info->map[index]; - while((!ptr->page_maps)&&(info->run_gc)) { - index = (index + 1) % info->num_data_zones; - ptr = info->map[index]; + logical_block_map *ptr = info->logical_block_maps[index]; + while(!ptr->page_maps && info->run_gc) { + index = (index + 1) % info->num_data_zones; + ptr = info->logical_block_maps[index]; continue; } - - if(!info->run_gc) - break; - + if(!info->run_gc) + break; pthread_mutex_lock(&info->zones_list_lock); //Get free zone and nullify the chain zone_info *free_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; + if (info->num_free_zones == 1) + info->free_zones_list_tail = NULL; free_zone->chain = NULL; + --info->num_free_zones; pthread_mutex_unlock(&info->zones_list_lock); pthread_mutex_lock(&ptr->logical_block_lock); //Merge the logical block to data zone + //printf("Before: num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); merge(info, ptr, free_zone); + //printf("After : num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); pthread_mutex_unlock(&ptr->logical_block_lock); pthread_mutex_lock(&info->zones_list_lock); @@ -235,14 +233,13 @@ void *gc_thread(void *info_ptr) //Reset if used log zone : if valid pages is reference is zero for (zone_info *prev = NULL, *free = NULL, *tmp = info->used_log_zones_list; tmp;) { - if(tmp->num_valid_pages <= 0) { + if (tmp->num_valid_pages <= 0) { free = tmp; tmp = tmp->chain; - if (!prev) { - info->used_log_zones_list = tmp; - } else { + if (prev) prev->chain = tmp; - } + else + info->used_log_zones_list = tmp; free->chain = NULL; //reset nvme_zns_mgmt_send(info->fd, info->nsid, @@ -251,14 +248,18 @@ void *gc_thread(void *info_ptr) //Remove from used_log_zones --info->num_used_log_zones; //Append sup to free zones - zone_info *itr = info->free_zones_list; - if(itr) { - while(itr->chain) - itr = itr->chain; - itr->chain = free; + // zone_info *itr = info->free_zones_list; + if(info->free_zones_list) { + // while(itr->chain) + // itr = itr->chain; + // itr->chain = free; + info->free_zones_list_tail->chain = free; + info->free_zones_list_tail = info->free_zones_list_tail->chain; } else { - itr = free; - } + info->free_zones_list = free; + info->free_zones_list_tail = free; + } + ++info->num_free_zones; } else { prev = tmp; tmp = tmp->chain; @@ -290,15 +291,14 @@ static void check_to_change_log_zone(zns_info *info, if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zone_num_pages - 1) return; - pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; - if (!info->used_log_zones_list) { - info->used_log_zones_list = info->curr_log_zone; - } else { + if (info->used_log_zones_list) { zone_info *head = info->used_log_zones_list; - while(head->chain != NULL) + while(head->chain) head = head->chain; head->chain = info->curr_log_zone; + } else { + info->used_log_zones_list = info->curr_log_zone; } ++info->num_used_log_zones; info->curr_log_zone = NULL; @@ -307,16 +307,17 @@ static void check_to_change_log_zone(zns_info *info, while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; - while(info->curr_log_zone == NULL) { - pthread_mutex_lock(&info->zones_list_lock); - if (count(info->free_zones_list) > 1) { - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - info->curr_log_zone->chain = NULL; - info->curr_log_zone->num_valid_pages = 0; - } - pthread_mutex_unlock(&info->zones_list_lock); - usleep(100); + while (!info->curr_log_zone) { + pthread_mutex_lock(&info->zones_list_lock); + if (info->num_free_zones > 1) { + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->chain; + info->curr_log_zone->chain = NULL; + info->curr_log_zone->num_valid_pages = 0; + --info->num_free_zones; + } + pthread_mutex_unlock(&info->zones_list_lock); + // usleep(100); } } @@ -324,38 +325,38 @@ static void update_map(zns_info *info, uint32_t logical_addr, unsigned long long physical_addr) { int index = hash_function(logical_addr, info->zone_num_pages); - logical_block_map **map = info->map; + logical_block_map **maps = info->logical_block_maps; increment_zone_valid_page_counter(info->curr_log_zone); //Fill in hashmap //printf("Added to %d\n",index); //Lock for the update in log - pthread_mutex_lock(&info->map[index]->logical_block_lock); - if (map[index]->page_maps == NULL) { - map[index]->page_maps = (page_map *)calloc(1, sizeof(page_map)); - map[index]->page_maps->page_zone_info = info->curr_log_zone; - map[index]->page_maps->logical_addr = logical_addr; - map[index]->page_maps->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); + if (!maps[index]->page_maps) { + maps[index]->page_maps = (page_map *)calloc(1, sizeof(page_map)); + maps[index]->page_maps->page_zone_info = info->curr_log_zone; + maps[index]->page_maps->logical_addr = logical_addr; + maps[index]->page_maps->physical_addr = physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - if (map[index]->page_maps->logical_addr == logical_addr) { + if (maps[index]->page_maps->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(map[index]->page_maps->page_zone_info); - map[index]->page_maps->page_zone_info = info->curr_log_zone; - map[index]->page_maps->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + decrement_zone_valid_page_counter(maps[index]->page_maps->page_zone_info); + maps[index]->page_maps->page_zone_info = info->curr_log_zone; + maps[index]->page_maps->physical_addr = physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - page_map *ptr = map[index]->page_maps; + page_map *ptr = maps[index]->page_maps; while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { //Update log counter decrement_zone_valid_page_counter(ptr->next->page_zone_info); ptr->next->page_zone_info = info->curr_log_zone; ptr->next->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } ptr = ptr->next; @@ -364,23 +365,21 @@ static void update_map(zns_info *info, ptr->next->page_zone_info = info->curr_log_zone; ptr->next->logical_addr = logical_addr; ptr->next->physical_addr = physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); } static int lookup_map(zns_info *info, uint32_t logical_page_addr, unsigned long long *physical_addr) { int index = hash_function(logical_page_addr, info->zone_num_pages); - //Lock the logical block - pthread_mutex_lock(&info->map[index]->logical_block_lock); - + pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); //Search in log - page_map *head = info->map[index]->page_maps; + page_map *head = info->logical_block_maps[index]->page_maps; while (head) { if (head->logical_addr == logical_page_addr) { *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return 0; } head = head->next; @@ -388,8 +387,8 @@ static int lookup_map(zns_info *info, //If not present provide data block addr uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); - *physical_addr = info->map[index]->block_ptr->physical_zone_saddr + offset; - pthread_mutex_unlock(&info->map[index]->logical_block_lock); + *physical_addr = info->logical_block_maps[index]->block_ptr->physical_zone_saddr + offset; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return 0; } @@ -470,31 +469,36 @@ int init_ss_zns_device(struct zdev_init_params *params, (*my_dev)->tparams.zns_zone_capacity; // set log zone page mapped hashmap size to num_data_zones - info->map = (logical_block_map **)calloc(info->num_data_zones, + info->logical_block_maps = (logical_block_map **)calloc(info->num_data_zones, sizeof(logical_block_map *)); // init zones_list_lock pthread_mutex_init(&info->zones_list_lock, NULL); // set all zone index to free_zones_list info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); pthread_mutex_init(&info->free_zones_list->page_counter_lock, NULL); - zone_info *tmp = info->free_zones_list; + info->free_zones_list_tail = info->free_zones_list; for (uint32_t i = 1; i < info->zns_num_zones; ++i) { - tmp->chain = (zone_info *)calloc(1, sizeof(zone_info)); - tmp->chain->physical_zone_saddr = i * info->zone_num_pages; - pthread_mutex_init(&tmp->chain->page_counter_lock, NULL); - tmp = tmp->chain; + info->free_zones_list_tail->chain = (zone_info *)calloc(1, sizeof(zone_info)); + info->free_zones_list_tail->chain->physical_zone_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_list_tail->chain->page_counter_lock, NULL); + info->free_zones_list_tail = info->free_zones_list_tail->chain; } - + // set num_free_zones + info->num_free_zones = info->zns_num_zones; + //Set current log zone to 0th zone info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; + if (info->num_free_zones == 1) + info->free_zones_list_tail = NULL; info->curr_log_zone->chain = NULL; info->curr_log_zone->num_valid_pages = 0; + --info->num_free_zones; for (uint32_t i = 0; i < info->num_data_zones; ++i) { - info->map[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); - info->map[i]->logical_block_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->map[i]->logical_block_lock, NULL); + info->logical_block_maps[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); + info->logical_block_maps[i]->logical_block_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->logical_block_maps[i]->logical_block_lock, NULL); } //Start GC @@ -536,31 +540,27 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int deinit_ss_zns_device(struct user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; - //Kill gc info->run_gc = false; pthread_join(info->gc_thread_id, NULL); - - - logical_block_map **map = info->map; + logical_block_map **maps = info->logical_block_maps; //free hashmap for (uint32_t i = 0; i < info->num_data_zones; ++i) { - if (map[i] == NULL) - continue; - + // if (!map[i]) + // continue; //Clear all log heads for a logical block - while (map[i]->page_maps) { - page_map *tmp = map[i]->page_maps; - map[i]->page_maps = map[i]->page_maps->next; + while (maps[i]->page_maps) { + page_map *tmp = maps[i]->page_maps; + maps[i]->page_maps = maps[i]->page_maps->next; free(tmp); } - if (map[i]->block_ptr) - free(map[i]->block_ptr); - pthread_mutex_destroy(&map[i]->logical_block_lock); + if (maps[i]->block_ptr) + free(maps[i]->block_ptr); + pthread_mutex_destroy(&maps[i]->logical_block_lock); //Clear map[i] - free(map[i]); + free(maps[i]); } - free(map); + free(maps); while (info->used_log_zones_list) { zone_info *tmp = info->used_log_zones_list; From 215879650daf7c938af4c27fc0741d7b2c942b13 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 26 Sep 2022 22:41:56 +0000 Subject: [PATCH 030/101] clean code --- src/m23-ftl/zns_device.cpp | 139 ++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 80 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 3f716be..04a5b79 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -26,38 +26,33 @@ SOFTWARE. #include #include #include -#include #include #include -#include #include "zns_device.h" extern "C" { -// enum {BUF_SIZE = 128 * 4096}; -// static uint32_t used_buf_size = 0; - -//Structure for zone in zns +// Structure for zone in zns struct zone_info { uint32_t num_valid_pages; // counter unsigned long long physical_zone_saddr; pthread_mutex_t page_counter_lock; - zone_info *chain; //Chained in free_zones and used_log_zones_list + zone_info *chain; // Chained in free_zones and used_log_zones_list }; -//Structure for pagemap in log +// Structure for pagemap in log struct page_map { uint64_t logical_addr; unsigned long long physical_addr; zone_info *page_zone_info; - page_map *next; //Logpage map for each logical block + page_map *next; // page map for each logical block }; -//Structure for logical block [contains page map and block map] +// Structure for logical block [contains page map and block map] struct logical_block_map { uint64_t logical_block_saddr; - page_map *page_maps; //Log page mapping for this logical block - zone_info *block_ptr; //Point to zone_info + page_map *page_maps; // page mapping for this logical block + zone_info *block_ptr; // Point to zone_info //TODO: LOCK the access pthread_mutex_t logical_block_lock; }; @@ -66,28 +61,27 @@ struct zns_info { // Values from init parameters int num_log_zones; int gc_trigger; - char device_name; pthread_t gc_thread_id; bool run_gc; // Query the nisd for following info int fd; - unsigned nsid; + unsigned nsid; uint32_t zns_page_size; - uint32_t zone_num_pages; uint32_t zns_num_zones; + uint32_t zone_num_pages; uint32_t num_data_zones; pthread_mutex_t zones_list_lock; // Log zone maintainance int num_used_log_zones; - zone_info *used_log_zones_list; // let the new log zone at the end of the array - zone_info *curr_log_zone; // the index of used_log_zones_list, which is equal to curr_log_zone_saddr / zns_zone_capacity - - //Logical to Physical mapping page and block + zone_info *used_log_zones_list; + zone_info *curr_log_zone; + + // Logical to Physical mapping page and block logical_block_map **logical_block_maps; // Page mapped hashmap for log zone - //Free zones array + // Free zones array uint32_t num_free_zones; zone_info *free_zones_list; zone_info *free_zones_list_tail; @@ -103,14 +97,14 @@ struct zns_info { // return count; // } -void increment_zone_valid_page_counter(zone_info *log) +void increase_zone_num_valid_page(zone_info *log) { pthread_mutex_lock(&log->page_counter_lock); ++log->num_valid_pages; pthread_mutex_unlock(&log->page_counter_lock); } -void decrement_zone_valid_page_counter(zone_info *log) +void decrease_zone_num_valid_page(zone_info *log) { pthread_mutex_lock(&log->page_counter_lock); --log->num_valid_pages; @@ -121,11 +115,11 @@ static int append_to_zone(zns_info *info, unsigned long long saddr, unsigned long long *physical_addr, void *buffer, uint32_t size) { - unsigned short number_of_pages = size / info->zns_page_size - 1; //calc from size and page_size - //TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. + unsigned short number_of_pages = size / info->zns_page_size - 1; // calc from size and page_size + // TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. nvme_zns_append(info->fd, info->nsid, saddr, number_of_pages, 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); - //ss_nvme_show_status(errno); + // ss_nvme_show_status(errno); return errno; } @@ -135,7 +129,7 @@ static int read_from_nvme(zns_info *info, unsigned long long physical_addr, unsigned short number_of_pages = size / info->zns_page_size - 1; nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, 0, 0, 0, 0, 0, size, buffer, 0, NULL); - //ss_nvme_show_status(errno); + // ss_nvme_show_status(errno); return errno; } @@ -148,25 +142,25 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) if (old_used_zone) { have_data = true; page_physical_addr = old_used_zone->physical_zone_saddr + offset; - decrement_zone_valid_page_counter(old_used_zone); + decrease_zone_num_valid_page(old_used_zone); } for (page_map *head = map->page_maps; head; head = head->next) { if (head->logical_addr == map->logical_block_saddr + offset) { page_physical_addr = head->physical_addr; - decrement_zone_valid_page_counter(head->page_zone_info); + decrease_zone_num_valid_page(head->page_zone_info); have_data = true; } } char *buffer = (char *)calloc(info->zns_page_size, sizeof(char)); - //Do nvme read on paddr + // Do nvme read on paddr if (have_data) read_from_nvme(info, page_physical_addr, buffer, info->zns_page_size); - //Do nvme append new_zone->saddr + // Do nvme append new_zone->saddr unsigned long long physical_addr = 0ULL; append_to_zone(info, new_zone->physical_zone_saddr, &physical_addr, buffer, info->zns_page_size); free(buffer); - increment_zone_valid_page_counter(new_zone); + increase_zone_num_valid_page(new_zone); } while (map->page_maps) { page_map *tmp = map->page_maps; @@ -174,15 +168,12 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) free(tmp); } map->block_ptr = new_zone; + // Append old data zone to free zones list if (old_used_zone) { nvme_zns_mgmt_send(info->fd, info->nsid, old_used_zone->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); - // zone_info *head = info->free_zones_list; if (info->free_zones_list) { - // while (head->chain) - // head = head->chain; - // head->chain = old_used_zone; info->free_zones_list_tail->chain = old_used_zone; info->free_zones_list_tail = info->free_zones_list_tail->chain; } else { @@ -201,7 +192,7 @@ void *gc_thread(void *info_ptr) //Check condition while (info->num_used_log_zones < info->gc_trigger && info->run_gc) continue; - logical_block_map *ptr = info->logical_block_maps[index]; + logical_block_map *ptr = info->logical_block_maps[index]; while(!ptr->page_maps && info->run_gc) { index = (index + 1) % info->num_data_zones; ptr = info->logical_block_maps[index]; @@ -210,7 +201,7 @@ void *gc_thread(void *info_ptr) if(!info->run_gc) break; pthread_mutex_lock(&info->zones_list_lock); - //Get free zone and nullify the chain + // Get free zone and nullify the chain zone_info *free_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; if (info->num_free_zones == 1) @@ -218,19 +209,18 @@ void *gc_thread(void *info_ptr) free_zone->chain = NULL; --info->num_free_zones; pthread_mutex_unlock(&info->zones_list_lock); - + pthread_mutex_lock(&ptr->logical_block_lock); - //Merge the logical block to data zone - //printf("Before: num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); + // Merge the logical block to data zone + // printf("Before: num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); merge(info, ptr, free_zone); - //printf("After : num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); + // printf("After : num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); pthread_mutex_unlock(&ptr->logical_block_lock); pthread_mutex_lock(&info->zones_list_lock); - //Check used log zone valid counter if zero reset and add to free zone list - //Append old data zone to free zones list - //FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list - //Reset if used log zone : if valid pages is reference is zero + // Check used log zone valid counter if zero reset and add to free zone list + // FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list + // Reset if used log zone : if valid pages is reference is zero for (zone_info *prev = NULL, *free = NULL, *tmp = info->used_log_zones_list; tmp;) { if (tmp->num_valid_pages <= 0) { @@ -241,18 +231,13 @@ void *gc_thread(void *info_ptr) else info->used_log_zones_list = tmp; free->chain = NULL; - //reset + // reset nvme_zns_mgmt_send(info->fd, info->nsid, free->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); - //Remove from used_log_zones + // Remove from used_log_zones --info->num_used_log_zones; - //Append sup to free zones - // zone_info *itr = info->free_zones_list; if(info->free_zones_list) { - // while(itr->chain) - // itr = itr->chain; - // itr->chain = free; info->free_zones_list_tail->chain = free; info->free_zones_list_tail = info->free_zones_list_tail->chain; } else { @@ -276,22 +261,20 @@ static inline int hash_function(uint32_t key, uint32_t base) return key / base; } -static inline int offset_function(uint32_t key, uint32_t base) +static inline int offset_function(uint32_t key, uint32_t base) { return key % base; } - -//Change this func static void check_to_change_log_zone(zns_info *info, unsigned long long last_append_addr) { - //TODO: Add a check on no of log zone used, trigger gc if it reaches the condition - //Check if current log zone is ended, then change to next free log zone; FIXME + // TODO: Add a check on no of log zone used, trigger gc if it reaches the condition + // Check if current log zone is ended, then change to next free log zone; FIXME if (last_append_addr - info->curr_log_zone->physical_zone_saddr < info->zone_num_pages - 1) return; - pthread_mutex_lock(&info->zones_list_lock); //Lock for changing used_log_zones_list and accessing free zones list; + pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; if (info->used_log_zones_list) { zone_info *head = info->used_log_zones_list; while(head->chain) @@ -303,7 +286,7 @@ static void check_to_change_log_zone(zns_info *info, ++info->num_used_log_zones; info->curr_log_zone = NULL; pthread_mutex_unlock(&info->zones_list_lock); - + while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; @@ -317,7 +300,6 @@ static void check_to_change_log_zone(zns_info *info, --info->num_free_zones; } pthread_mutex_unlock(&info->zones_list_lock); - // usleep(100); } } @@ -326,9 +308,9 @@ static void update_map(zns_info *info, { int index = hash_function(logical_addr, info->zone_num_pages); logical_block_map **maps = info->logical_block_maps; - increment_zone_valid_page_counter(info->curr_log_zone); + increase_zone_num_valid_page(info->curr_log_zone); //Fill in hashmap - //printf("Added to %d\n",index); + //printf("Added to %d\n",index); //Lock for the update in log pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); if (!maps[index]->page_maps) { @@ -339,10 +321,10 @@ static void update_map(zns_info *info, pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - + if (maps[index]->page_maps->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(maps[index]->page_maps->page_zone_info); + decrease_zone_num_valid_page(maps[index]->page_maps->page_zone_info); maps[index]->page_maps->page_zone_info = info->curr_log_zone; maps[index]->page_maps->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); @@ -353,7 +335,7 @@ static void update_map(zns_info *info, while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { //Update log counter - decrement_zone_valid_page_counter(ptr->next->page_zone_info); + decrease_zone_num_valid_page(ptr->next->page_zone_info); ptr->next->page_zone_info = info->curr_log_zone; ptr->next->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); @@ -405,7 +387,7 @@ int init_ss_zns_device(struct zdev_init_params *params, info->num_log_zones = params->log_zones; // set gc_trigger info->gc_trigger = params->gc_wmark; - + // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -450,7 +432,7 @@ int init_ss_zns_device(struct zdev_init_params *params, return ret; } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; + info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; // set num_data_zones = zns_num_zones - num_log_zones info->num_data_zones = info->zns_num_zones - info->num_log_zones; @@ -496,7 +478,7 @@ int init_ss_zns_device(struct zdev_init_params *params, --info->num_free_zones; for (uint32_t i = 0; i < info->num_data_zones; ++i) { - info->logical_block_maps[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); + info->logical_block_maps[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); info->logical_block_maps[i]->logical_block_saddr = i * info->zone_num_pages; pthread_mutex_init(&info->logical_block_maps[i]->logical_block_lock, NULL); } @@ -516,7 +498,6 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, int ret = lookup_map(info, address / info->zns_page_size, &physical_addr); if (ret) return ret; - // if (!get) read_from_nvme(info, physical_addr, buffer, size); return errno; } @@ -530,9 +511,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, &physical_addr, buffer, size); if (ret) return ret; - //printf("Wait1\n"); update_map(info, address / info->zns_page_size, physical_addr); - //printf("Wait2\n %d",info->num_used_log_zones); check_to_change_log_zone(info, physical_addr); return 0; } @@ -540,35 +519,35 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int deinit_ss_zns_device(struct user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; - //Kill gc + // Kill gc info->run_gc = false; pthread_join(info->gc_thread_id, NULL); logical_block_map **maps = info->logical_block_maps; - //free hashmap + // free hashmap for (uint32_t i = 0; i < info->num_data_zones; ++i) { - // if (!map[i]) - // continue; - //Clear all log heads for a logical block + // Clear all log heads for a logical block while (maps[i]->page_maps) { page_map *tmp = maps[i]->page_maps; maps[i]->page_maps = maps[i]->page_maps->next; free(tmp); } - if (maps[i]->block_ptr) + if (maps[i]->block_ptr) { + pthread_mutex_destroy(&maps[i]->block_ptr->page_counter_lock); free(maps[i]->block_ptr); + } pthread_mutex_destroy(&maps[i]->logical_block_lock); - //Clear map[i] + // Clear maps[i] free(maps[i]); } free(maps); - + while (info->used_log_zones_list) { zone_info *tmp = info->used_log_zones_list; info->used_log_zones_list = info->used_log_zones_list->chain; pthread_mutex_destroy(&tmp->page_counter_lock); free(tmp); } - + while (info->free_zones_list) { zone_info *tmp = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; From b1bf80a4d75798ac3d453cadcc19c12e6ef49c39 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Thu, 29 Sep 2022 22:28:01 +0000 Subject: [PATCH 031/101] make page_maps become ordered --- src/m23-ftl/zns_device.cpp | 81 +++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 04a5b79..727b155 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -136,20 +136,25 @@ static int read_from_nvme(zns_info *info, unsigned long long physical_addr, void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { zone_info *old_used_zone = map->block_ptr; + page_map *ptr = map->page_maps; for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { unsigned long long page_physical_addr = 0ULL; bool have_data = false; + bool still_have_data = false; if (old_used_zone) { have_data = true; page_physical_addr = old_used_zone->physical_zone_saddr + offset; decrease_zone_num_valid_page(old_used_zone); + if (old_used_zone->num_valid_pages) + still_have_data = true; } - for (page_map *head = map->page_maps; head; head = head->next) { - if (head->logical_addr == map->logical_block_saddr + offset) { - page_physical_addr = head->physical_addr; - decrease_zone_num_valid_page(head->page_zone_info); + if (ptr && ptr->logical_addr == map->logical_block_saddr + offset) { have_data = true; - } + page_physical_addr = ptr->physical_addr; + decrease_zone_num_valid_page(ptr->page_zone_info); + ptr = ptr->next; + if (ptr) + still_have_data = true; } char *buffer = (char *)calloc(info->zns_page_size, sizeof(char)); // Do nvme read on paddr @@ -161,6 +166,8 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) buffer, info->zns_page_size); free(buffer); increase_zone_num_valid_page(new_zone); + if (!still_have_data) + break; } while (map->page_maps) { page_map *tmp = map->page_maps; @@ -196,6 +203,7 @@ void *gc_thread(void *info_ptr) while(!ptr->page_maps && info->run_gc) { index = (index + 1) % info->num_data_zones; ptr = info->logical_block_maps[index]; + printf("free_zones: %u, used_log_zones: %d\n", info->num_free_zones, info->num_used_log_zones); continue; } if(!info->run_gc) @@ -256,12 +264,12 @@ void *gc_thread(void *info_ptr) return NULL; } -static inline int hash_function(uint32_t key, uint32_t base) +static inline uint32_t hash_function(uint32_t key, uint32_t base) { return key / base; } -static inline int offset_function(uint32_t key, uint32_t base) +static inline uint32_t offset_function(uint32_t key, uint32_t base) { return key % base; } @@ -306,7 +314,7 @@ static void check_to_change_log_zone(zns_info *info, static void update_map(zns_info *info, uint32_t logical_addr, unsigned long long physical_addr) { - int index = hash_function(logical_addr, info->zone_num_pages); + uint32_t index = hash_function(logical_addr, info->zone_num_pages); logical_block_map **maps = info->logical_block_maps; increase_zone_num_valid_page(info->curr_log_zone); //Fill in hashmap @@ -331,6 +339,17 @@ static void update_map(zns_info *info, return; } + if (maps[index]->page_maps->logical_addr > logical_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = maps[index]->page_maps; + maps[index]->page_maps = tmp; + tmp->page_zone_info = info->curr_log_zone; + tmp->logical_addr = logical_addr; + tmp->physical_addr = physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + return; + } + page_map *ptr = maps[index]->page_maps; while (ptr->next) { if (ptr->next->logical_addr == logical_addr) { @@ -340,6 +359,15 @@ static void update_map(zns_info *info, ptr->next->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; + } else if (ptr->next->logical_addr > logical_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = ptr->next; + ptr->next = tmp; + tmp->page_zone_info = info->curr_log_zone; + tmp->logical_addr = logical_addr; + tmp->physical_addr = physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + return; } ptr = ptr->next; } @@ -353,7 +381,7 @@ static void update_map(zns_info *info, static int lookup_map(zns_info *info, uint32_t logical_page_addr, unsigned long long *physical_addr) { - int index = hash_function(logical_page_addr, info->zone_num_pages); + uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); //Lock the logical block pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); //Search in log @@ -507,12 +535,35 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, { unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; - int ret = append_to_zone(info, info->curr_log_zone->physical_zone_saddr, - &physical_addr, buffer, size); - if (ret) - return ret; - update_map(info, address / info->zns_page_size, physical_addr); - check_to_change_log_zone(info, physical_addr); + // // if can write to data zone directly + // uint32_t index = hash_function(address / info->zns_page_size, info->zone_num_pages); + // pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); + // if (info->logical_block_maps[index]->block_ptr) { + // while (info->logical_block_maps[index]->block_ptr->num_valid_pages < address / info->zns_page_size - info->logical_block_maps[index]->logical_block_saddr) { + // uint32_t size = info->zns_page_size; + // char *buffer = (char *)calloc(1, size); + // int ret = append_to_zone(info, info->logical_block_maps[index]->block_ptr->physical_zone_saddr, + // &physical_addr, buffer, size); + // free(buffer); + // if (ret) + // return ret; + // increase_zone_num_valid_page(info->logical_block_maps[index]->block_ptr); + // } + // int ret = append_to_zone(info, info->logical_block_maps[index]->block_ptr->physical_zone_saddr, + // &physical_addr, buffer, size); + // if (ret) + // return ret; + // increase_zone_num_valid_page(info->logical_block_maps[index]->block_ptr); + // pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + // } else { + // pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + int ret = append_to_zone(info, info->curr_log_zone->physical_zone_saddr, + &physical_addr, buffer, size); + if (ret) + return ret; + update_map(info, address / info->zns_page_size, physical_addr); + check_to_change_log_zone(info, physical_addr); + // } return 0; } From 42ab87d99e54e30ae02e4867b52627a9d86c3f87 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 30 Sep 2022 09:42:27 +0000 Subject: [PATCH 032/101] appending data first to data zone if the data's address can append --- src/m23-ftl/zns_device.cpp | 55 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 727b155..a2e9446 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -197,13 +197,11 @@ void *gc_thread(void *info_ptr) uint32_t index = 0; while (info->run_gc) { //Check condition - while (info->num_used_log_zones < info->gc_trigger && info->run_gc) - continue; + while (info->num_used_log_zones < info->gc_trigger && info->run_gc); logical_block_map *ptr = info->logical_block_maps[index]; while(!ptr->page_maps && info->run_gc) { index = (index + 1) % info->num_data_zones; ptr = info->logical_block_maps[index]; - printf("free_zones: %u, used_log_zones: %d\n", info->num_free_zones, info->num_used_log_zones); continue; } if(!info->run_gc) @@ -535,35 +533,40 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, { unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; - // // if can write to data zone directly - // uint32_t index = hash_function(address / info->zns_page_size, info->zone_num_pages); - // pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); - // if (info->logical_block_maps[index]->block_ptr) { - // while (info->logical_block_maps[index]->block_ptr->num_valid_pages < address / info->zns_page_size - info->logical_block_maps[index]->logical_block_saddr) { - // uint32_t size = info->zns_page_size; - // char *buffer = (char *)calloc(1, size); - // int ret = append_to_zone(info, info->logical_block_maps[index]->block_ptr->physical_zone_saddr, - // &physical_addr, buffer, size); - // free(buffer); - // if (ret) - // return ret; - // increase_zone_num_valid_page(info->logical_block_maps[index]->block_ptr); - // } - // int ret = append_to_zone(info, info->logical_block_maps[index]->block_ptr->physical_zone_saddr, - // &physical_addr, buffer, size); - // if (ret) - // return ret; - // increase_zone_num_valid_page(info->logical_block_maps[index]->block_ptr); - // pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - // } else { - // pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + uint32_t index = hash_function(address / info->zns_page_size, + info->zone_num_pages); + logical_block_map *map = info->logical_block_maps[index]; + pthread_mutex_lock(&map->logical_block_lock); + // if can write to data zone directly + if (map->block_ptr && + map->block_ptr->num_valid_pages < info->zone_num_pages) { + uint32_t offset = offset_function(address / info->zns_page_size, + info->zone_num_pages); + char *null_buffer = (char *)calloc(1, info->zns_page_size); + while (map->block_ptr->num_valid_pages < offset) { + int ret = append_to_zone(info, map->block_ptr->physical_zone_saddr, + &physical_addr, + null_buffer, info->zns_page_size); + if (ret) + return ret; + increase_zone_num_valid_page(map->block_ptr); + } + free(null_buffer); + int ret = append_to_zone(info, map->block_ptr->physical_zone_saddr, + &physical_addr, buffer, size); + if (ret) + return ret; + increase_zone_num_valid_page(map->block_ptr); + pthread_mutex_unlock(&map->logical_block_lock); + } else { + pthread_mutex_unlock(&map->logical_block_lock); int ret = append_to_zone(info, info->curr_log_zone->physical_zone_saddr, &physical_addr, buffer, size); if (ret) return ret; update_map(info, address / info->zns_page_size, physical_addr); check_to_change_log_zone(info, physical_addr); - // } + } return 0; } From 1a537e0f7cf589d8d1874cb485562e66f09130aa Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 1 Oct 2022 16:54:46 +0000 Subject: [PATCH 033/101] write multiple pages --- src/m23-ftl/zns_device.cpp | 354 +++++++++++++++++++++++-------------- 1 file changed, 220 insertions(+), 134 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a2e9446..f287c5e 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -23,10 +23,11 @@ SOFTWARE. #include #include #include -#include #include #include #include +#include +#include #include #include "zns_device.h" @@ -35,8 +36,10 @@ extern "C" { // Structure for zone in zns struct zone_info { uint32_t num_valid_pages; // counter + uint32_t write_ptr; unsigned long long physical_zone_saddr; pthread_mutex_t page_counter_lock; + pthread_mutex_t write_ptr_lock; zone_info *chain; // Chained in free_zones and used_log_zones_list }; @@ -52,7 +55,8 @@ struct page_map { struct logical_block_map { uint64_t logical_block_saddr; page_map *page_maps; // page mapping for this logical block - zone_info *block_ptr; // Point to zone_info + page_map *old_page_maps; + zone_info *block_map; // Point to zone_info //TODO: LOCK the access pthread_mutex_t logical_block_lock; }; @@ -63,7 +67,6 @@ struct zns_info { int gc_trigger; pthread_t gc_thread_id; bool run_gc; - // Query the nisd for following info int fd; unsigned nsid; @@ -71,20 +74,19 @@ struct zns_info { uint32_t zns_num_zones; uint32_t zone_num_pages; uint32_t num_data_zones; + uint32_t maximum_data_transfer_size; + uint32_t zone_append_size_limit; pthread_mutex_t zones_list_lock; - // Log zone maintainance int num_used_log_zones; zone_info *used_log_zones_list; zone_info *curr_log_zone; - - // Logical to Physical mapping page and block - logical_block_map **logical_block_maps; // Page mapped hashmap for log zone - // Free zones array uint32_t num_free_zones; zone_info *free_zones_list; zone_info *free_zones_list_tail; + // Logical to Physical mapping page and block + logical_block_map **logical_block_maps; // Page mapped hashmap for log zone }; // int count(zone_info *ptr) @@ -97,28 +99,59 @@ struct zns_info { // return count; // } -void increase_zone_num_valid_page(zone_info *log) +static inline void increase_zone_num_valid_page(zone_info *zone, + uint32_t num_pages) { - pthread_mutex_lock(&log->page_counter_lock); - ++log->num_valid_pages; - pthread_mutex_unlock(&log->page_counter_lock); + pthread_mutex_lock(&zone->page_counter_lock); + zone->num_valid_pages += num_pages; + pthread_mutex_unlock(&zone->page_counter_lock); } -void decrease_zone_num_valid_page(zone_info *log) +static inline void decrease_zone_num_valid_page(zone_info *zone, + uint32_t num_pages) { - pthread_mutex_lock(&log->page_counter_lock); - --log->num_valid_pages; - pthread_mutex_unlock(&log->page_counter_lock); + pthread_mutex_lock(&zone->page_counter_lock); + zone->num_valid_pages -= num_pages; + pthread_mutex_unlock(&zone->page_counter_lock); } -static int append_to_zone(zns_info *info, unsigned long long saddr, - unsigned long long *physical_addr, - void *buffer, uint32_t size) +static inline void increase_zone_write_ptr(zone_info *zone, + uint32_t num_pages) { - unsigned short number_of_pages = size / info->zns_page_size - 1; // calc from size and page_size - // TODO: Later make provision to include meta data containing lba and write size. For persistent log storage. - nvme_zns_append(info->fd, info->nsid, saddr, number_of_pages, - 0, 0, 0, 0, size, buffer, 0, NULL, physical_addr); + pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr += num_pages; + pthread_mutex_unlock(&zone->write_ptr_lock); +} + +static inline void decrease_zone_write_ptr(zone_info *zone, + uint32_t num_pages) +{ + pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr -= num_pages; + pthread_mutex_unlock(&zone->write_ptr_lock); +} + +static int append_to_data_zone(zns_info *info, unsigned long long saddr, + void *buffer, uint32_t size) +{ + uint32_t appended_size = 0; + while (appended_size < size) { + unsigned long long physical_addr = 0ULL; + if (info->zone_append_size_limit < size - appended_size) { + unsigned short num_pages = info->zone_append_size_limit / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, saddr, num_pages - 1, 0, 0, 0, 0, + info->zone_append_size_limit, (char *)buffer + appended_size, + 0, NULL, &physical_addr); + appended_size += info->zone_append_size_limit; + } else { + uint32_t curr_append_size = size - appended_size; + unsigned short num_pages = curr_append_size / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, saddr, num_pages - 1, 0, 0, 0, 0, + curr_append_size, (char *)buffer + appended_size, + 0, NULL, &physical_addr); + appended_size += curr_append_size; + } + } // ss_nvme_show_status(errno); return errno; } @@ -126,17 +159,23 @@ static int append_to_zone(zns_info *info, unsigned long long saddr, static int read_from_nvme(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size) { - unsigned short number_of_pages = size / info->zns_page_size - 1; - nvme_read(info->fd, info->nsid, physical_addr, number_of_pages, - 0, 0, 0, 0, 0, size, buffer, 0, NULL); + unsigned short num_pages = size / info->zns_page_size - 1; + nvme_read(info->fd, info->nsid, physical_addr, num_pages, 0, 0, 0, 0, 0, + size, buffer, 0, NULL); // ss_nvme_show_status(errno); return errno; } -void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) +static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) { - zone_info *old_used_zone = map->block_ptr; - page_map *ptr = map->page_maps; + pthread_mutex_lock(&map->logical_block_lock); + map->old_page_maps = map->page_maps; + map->page_maps = NULL; + pthread_mutex_unlock(&map->logical_block_lock); + page_map *ptr = map->old_page_maps; + zone_info *old_used_zone = map->block_map; + uint32_t zone_append_page_limit = info->zone_append_size_limit / info->zns_page_size; + char * buffer = (char *)calloc(info->zone_append_size_limit, sizeof(char)); for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { unsigned long long page_physical_addr = 0ULL; bool have_data = false; @@ -144,39 +183,48 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) if (old_used_zone) { have_data = true; page_physical_addr = old_used_zone->physical_zone_saddr + offset; - decrease_zone_num_valid_page(old_used_zone); - if (old_used_zone->num_valid_pages) + decrease_zone_write_ptr(old_used_zone, 1); + if (old_used_zone->write_ptr) still_have_data = true; } if (ptr && ptr->logical_addr == map->logical_block_saddr + offset) { - have_data = true; - page_physical_addr = ptr->physical_addr; - decrease_zone_num_valid_page(ptr->page_zone_info); - ptr = ptr->next; - if (ptr) - still_have_data = true; + have_data = true; + page_physical_addr = ptr->physical_addr; + decrease_zone_num_valid_page(ptr->page_zone_info, 1); + ptr = ptr->next; + if (ptr) + still_have_data = true; } - char *buffer = (char *)calloc(info->zns_page_size, sizeof(char)); - // Do nvme read on paddr if (have_data) - read_from_nvme(info, page_physical_addr, buffer, info->zns_page_size); - // Do nvme append new_zone->saddr - unsigned long long physical_addr = 0ULL; - append_to_zone(info, new_zone->physical_zone_saddr, &physical_addr, - buffer, info->zns_page_size); - free(buffer); - increase_zone_num_valid_page(new_zone); - if (!still_have_data) + read_from_nvme(info, page_physical_addr, + buffer + (offset % zone_append_page_limit) * info->zns_page_size, + info->zns_page_size); + if (!still_have_data) { + append_to_data_zone(info, new_zone->physical_zone_saddr, buffer, + (offset % zone_append_page_limit + 1) * info->zns_page_size); + increase_zone_write_ptr(new_zone, offset % zone_append_page_limit + 1); break; + } + if (offset % zone_append_page_limit == zone_append_page_limit - 1) { + append_to_data_zone(info, new_zone->physical_zone_saddr, + buffer, info->zone_append_size_limit); + increase_zone_write_ptr(new_zone, zone_append_page_limit); + memset(buffer, 0, info->zone_append_size_limit); + } } - while (map->page_maps) { - page_map *tmp = map->page_maps; - map->page_maps = map->page_maps->next; + free(buffer); + pthread_mutex_lock(&map->logical_block_lock); + while (map->old_page_maps) { + page_map *tmp = map->old_page_maps; + map->old_page_maps = map->old_page_maps->next; free(tmp); } - map->block_ptr = new_zone; + map->block_map = new_zone; + pthread_mutex_unlock(&map->logical_block_lock); // Append old data zone to free zones list + pthread_mutex_lock(&info->zones_list_lock); if (old_used_zone) { + decrease_zone_num_valid_page(old_used_zone, old_used_zone->num_valid_pages); nvme_zns_mgmt_send(info->fd, info->nsid, old_used_zone->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); @@ -189,23 +237,26 @@ void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) } ++info->num_free_zones; } + pthread_mutex_unlock(&info->zones_list_lock); } -void *gc_thread(void *info_ptr) +static void *gc_thread(void *info_ptr) { zns_info *info = (zns_info *)info_ptr; uint32_t index = 0; while (info->run_gc) { //Check condition - while (info->num_used_log_zones < info->gc_trigger && info->run_gc); + while (info->num_log_zones - info->num_used_log_zones > info->gc_trigger) { + if (!info->run_gc) + return NULL; + } logical_block_map *ptr = info->logical_block_maps[index]; - while(!ptr->page_maps && info->run_gc) { + while(!ptr->page_maps) { index = (index + 1) % info->num_data_zones; ptr = info->logical_block_maps[index]; - continue; + if (!info->run_gc) + return NULL; } - if(!info->run_gc) - break; pthread_mutex_lock(&info->zones_list_lock); // Get free zone and nullify the chain zone_info *free_zone = info->free_zones_list; @@ -215,21 +266,15 @@ void *gc_thread(void *info_ptr) free_zone->chain = NULL; --info->num_free_zones; pthread_mutex_unlock(&info->zones_list_lock); - - pthread_mutex_lock(&ptr->logical_block_lock); // Merge the logical block to data zone - // printf("Before: num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); merge(info, ptr, free_zone); - // printf("After : num_log_zone: %d, num_free_zone: %d\n", info->num_used_log_zones, count(info->free_zones_list)); - pthread_mutex_unlock(&ptr->logical_block_lock); - - pthread_mutex_lock(&info->zones_list_lock); // Check used log zone valid counter if zero reset and add to free zone list // FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list // Reset if used log zone : if valid pages is reference is zero for (zone_info *prev = NULL, *free = NULL, *tmp = info->used_log_zones_list; tmp;) { - if (tmp->num_valid_pages <= 0) { + if (tmp->num_valid_pages == 0) { + pthread_mutex_lock(&info->zones_list_lock); free = tmp; tmp = tmp->chain; if (prev) @@ -238,6 +283,7 @@ void *gc_thread(void *info_ptr) info->used_log_zones_list = tmp; free->chain = NULL; // reset + decrease_zone_write_ptr(free, free->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, free->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); @@ -251,12 +297,12 @@ void *gc_thread(void *info_ptr) info->free_zones_list_tail = free; } ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_list_lock); } else { prev = tmp; tmp = tmp->chain; } } - pthread_mutex_unlock(&info->zones_list_lock); index = (index + 1) % info->num_data_zones; } return NULL; @@ -272,14 +318,10 @@ static inline uint32_t offset_function(uint32_t key, uint32_t base) return key % base; } -static void check_to_change_log_zone(zns_info *info, - unsigned long long last_append_addr) +static void change_log_zone(zns_info *info) { // TODO: Add a check on no of log zone used, trigger gc if it reaches the condition // Check if current log zone is ended, then change to next free log zone; FIXME - if (last_append_addr - info->curr_log_zone->physical_zone_saddr < - info->zone_num_pages - 1) - return; pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; if (info->used_log_zones_list) { zone_info *head = info->used_log_zones_list; @@ -292,9 +334,7 @@ static void check_to_change_log_zone(zns_info *info, ++info->num_used_log_zones; info->curr_log_zone = NULL; pthread_mutex_unlock(&info->zones_list_lock); - while (info->num_used_log_zones == info->num_log_zones); - //Dequeue from free_zone to curr_log_zone; while (!info->curr_log_zone) { pthread_mutex_lock(&info->zones_list_lock); @@ -310,11 +350,12 @@ static void check_to_change_log_zone(zns_info *info, } static void update_map(zns_info *info, - uint32_t logical_addr, unsigned long long physical_addr) + uint32_t logical_page_addr, unsigned long long physical_addr) { - uint32_t index = hash_function(logical_addr, info->zone_num_pages); + uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); logical_block_map **maps = info->logical_block_maps; - increase_zone_num_valid_page(info->curr_log_zone); + increase_zone_num_valid_page(info->curr_log_zone, 1); + increase_zone_write_ptr(info->curr_log_zone, 1); //Fill in hashmap //printf("Added to %d\n",index); //Lock for the update in log @@ -322,47 +363,44 @@ static void update_map(zns_info *info, if (!maps[index]->page_maps) { maps[index]->page_maps = (page_map *)calloc(1, sizeof(page_map)); maps[index]->page_maps->page_zone_info = info->curr_log_zone; - maps[index]->page_maps->logical_addr = logical_addr; + maps[index]->page_maps->logical_addr = logical_page_addr; maps[index]->page_maps->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - - if (maps[index]->page_maps->logical_addr == logical_addr) { + if (maps[index]->page_maps->logical_addr == logical_page_addr) { //Update log counter - decrease_zone_num_valid_page(maps[index]->page_maps->page_zone_info); + decrease_zone_num_valid_page(maps[index]->page_maps->page_zone_info, 1); maps[index]->page_maps->page_zone_info = info->curr_log_zone; maps[index]->page_maps->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - - if (maps[index]->page_maps->logical_addr > logical_addr) { + if (maps[index]->page_maps->logical_addr > logical_page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = maps[index]->page_maps; maps[index]->page_maps = tmp; tmp->page_zone_info = info->curr_log_zone; - tmp->logical_addr = logical_addr; + tmp->logical_addr = logical_page_addr; tmp->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; } - page_map *ptr = maps[index]->page_maps; while (ptr->next) { - if (ptr->next->logical_addr == logical_addr) { + if (ptr->next->logical_addr == logical_page_addr) { //Update log counter - decrease_zone_num_valid_page(ptr->next->page_zone_info); + decrease_zone_num_valid_page(ptr->next->page_zone_info, 1); ptr->next->page_zone_info = info->curr_log_zone; ptr->next->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; - } else if (ptr->next->logical_addr > logical_addr) { + } else if (ptr->next->logical_addr > logical_page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = ptr->next; ptr->next = tmp; tmp->page_zone_info = info->curr_log_zone; - tmp->logical_addr = logical_addr; + tmp->logical_addr = logical_page_addr; tmp->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return; @@ -371,7 +409,7 @@ static void update_map(zns_info *info, } ptr->next = (page_map *)calloc(1, sizeof(page_map)); ptr->next->page_zone_info = info->curr_log_zone; - ptr->next->logical_addr = logical_addr; + ptr->next->logical_addr = logical_page_addr; ptr->next->physical_addr = physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); } @@ -383,24 +421,75 @@ static int lookup_map(zns_info *info, //Lock the logical block pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); //Search in log - page_map *head = info->logical_block_maps[index]->page_maps; - while (head) { + for (page_map *head = info->logical_block_maps[index]->page_maps; head; head = head->next) { + if (head->logical_addr == logical_page_addr) { + *physical_addr = head->physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); + return 0; + } + } + for (page_map *head = info->logical_block_maps[index]->old_page_maps; head; head = head->next) { if (head->logical_addr == logical_page_addr) { *physical_addr = head->physical_addr; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); return 0; } - head = head->next; } - //If not present provide data block addr uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); - *physical_addr = info->logical_block_maps[index]->block_ptr->physical_zone_saddr + offset; + *physical_addr = info->logical_block_maps[index]->block_map->physical_zone_saddr + offset; pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return 0; } +static int append_to_log_zone(zns_info *info, uint64_t logical_addr, + void *buffer, uint32_t size) +{ + uint64_t logical_page_addr = logical_addr / info->zns_page_size; + uint32_t appended_size = 0; + while (appended_size < size) { + unsigned long long physical_addr = 0ULL; + uint32_t num_zone_remain_pages = info->zone_num_pages - + info->curr_log_zone->write_ptr; + uint32_t num_zone_remain_size = num_zone_remain_pages * info->zns_page_size; + if (num_zone_remain_size <= info->zone_append_size_limit && + num_zone_remain_size <= size - appended_size) { + nvme_zns_append(info->fd, info->nsid, + info->curr_log_zone->physical_zone_saddr, num_zone_remain_pages - 1, + 0, 0, 0, 0, num_zone_remain_size, (char *)buffer + appended_size, + 0, NULL, &physical_addr); + if (errno) + return errno; + for (uint32_t i = 0; i < num_zone_remain_pages; ++i, ++logical_page_addr, ++physical_addr) + update_map(info, logical_page_addr, physical_addr); + change_log_zone(info); + appended_size += num_zone_remain_size; + } else if (info->zone_append_size_limit < size - appended_size) { + unsigned short num_pages = info->zone_append_size_limit / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, + num_pages - 1, 0, 0, 0, 0, info->zone_append_size_limit, + (char *)buffer + appended_size, 0, NULL, &physical_addr); + if (errno) + return errno; + for (uint32_t i = 0; i < num_pages; ++i, ++logical_page_addr, ++physical_addr) + update_map(info, logical_page_addr, physical_addr); + appended_size += info->zone_append_size_limit; + } else { + unsigned short num_pages = (size - appended_size) / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, + num_pages - 1, 0, 0, 0, 0, size - appended_size, + (char *)buffer + appended_size, 0, NULL, &physical_addr); + if (errno) + return errno; + for (uint32_t i = 0; i < num_pages; ++i, ++logical_page_addr, ++physical_addr) + update_map(info, logical_page_addr, physical_addr); + appended_size += size - appended_size; + } + } + // ss_nvme_show_status(errno); + return errno; +} + int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { @@ -408,19 +497,16 @@ int init_ss_zns_device(struct zdev_init_params *params, *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; - // set num_log_zones info->num_log_zones = params->log_zones; // set gc_trigger info->gc_trigger = params->gc_wmark; - // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { printf("Dev %s opened failed %d\n", params->name, info->fd); return errno; } - // set nsid int ret = nvme_get_nsid(info->fd, &info->nsid); if (ret) { @@ -436,7 +522,6 @@ int init_ss_zns_device(struct zdev_init_params *params, return ret; } } - // set zns_lba_size(or)zns_page_size : Its same for now! nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); @@ -447,7 +532,6 @@ int init_ss_zns_device(struct zdev_init_params *params, (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; info->zns_page_size = (*my_dev)->tparams.zns_lba_size; - // set zns_num_zones nvme_zone_report zns_report; ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, @@ -459,23 +543,29 @@ int init_ss_zns_device(struct zdev_init_params *params, } (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; - // set num_data_zones = zns_num_zones - num_log_zones info->num_data_zones = info->zns_num_zones - info->num_log_zones; - // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; - // set zns_zone_capacity = #page_per_zone * zone_size (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * (*my_dev)->tparams.zns_lba_size; - // set user capacity bytes = #data_zones * zone_capacity (*my_dev)->capacity_bytes = (info->num_data_zones) * (*my_dev)->tparams.zns_zone_capacity; - + // set maximum_data_transfer_size + struct nvme_id_ctrl ctrl; + nvme_identify_ctrl(info->fd, &ctrl); + void *regs = mmap(NULL, getpagesize(), PROT_READ,MAP_SHARED, info->fd, 0); + info->maximum_data_transfer_size = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * + (*my_dev)->lba_size_bytes; + // set zone_append_size_limit + struct nvme_zns_id_ctrl id; + nvme_zns_identify_ctrl(info->fd, &id); + info->zone_append_size_limit = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) * + (*my_dev)->lba_size_bytes; // set log zone page mapped hashmap size to num_data_zones info->logical_block_maps = (logical_block_map **)calloc(info->num_data_zones, sizeof(logical_block_map *)); @@ -487,13 +577,13 @@ int init_ss_zns_device(struct zdev_init_params *params, info->free_zones_list_tail = info->free_zones_list; for (uint32_t i = 1; i < info->zns_num_zones; ++i) { info->free_zones_list_tail->chain = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail->chain->physical_zone_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_list_tail->chain->page_counter_lock, NULL); info->free_zones_list_tail = info->free_zones_list_tail->chain; + info->free_zones_list_tail->physical_zone_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_list_tail->page_counter_lock, NULL); + pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); } // set num_free_zones info->num_free_zones = info->zns_num_zones; - //Set current log zone to 0th zone info->curr_log_zone = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; @@ -502,13 +592,11 @@ int init_ss_zns_device(struct zdev_init_params *params, info->curr_log_zone->chain = NULL; info->curr_log_zone->num_valid_pages = 0; --info->num_free_zones; - for (uint32_t i = 0; i < info->num_data_zones; ++i) { info->logical_block_maps[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); info->logical_block_maps[i]->logical_block_saddr = i * info->zone_num_pages; pthread_mutex_init(&info->logical_block_maps[i]->logical_block_lock, NULL); } - //Start GC info->run_gc = true; pthread_create(&info->gc_thread_id, NULL, &gc_thread, (void *)info); @@ -521,51 +609,50 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous + pthread_mutex_lock(&info->zones_list_lock); int ret = lookup_map(info, address / info->zns_page_size, &physical_addr); if (ret) return ret; read_from_nvme(info, physical_addr, buffer, size); + pthread_mutex_unlock(&info->zones_list_lock); return errno; } int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { - unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; uint32_t index = hash_function(address / info->zns_page_size, info->zone_num_pages); logical_block_map *map = info->logical_block_maps[index]; pthread_mutex_lock(&map->logical_block_lock); // if can write to data zone directly - if (map->block_ptr && - map->block_ptr->num_valid_pages < info->zone_num_pages) { + if (!map->old_page_maps && map->block_map && + map->block_map->write_ptr < info->zone_num_pages) { uint32_t offset = offset_function(address / info->zns_page_size, info->zone_num_pages); - char *null_buffer = (char *)calloc(1, info->zns_page_size); - while (map->block_ptr->num_valid_pages < offset) { - int ret = append_to_zone(info, map->block_ptr->physical_zone_saddr, - &physical_addr, - null_buffer, info->zns_page_size); - if (ret) - return ret; - increase_zone_num_valid_page(map->block_ptr); - } + // append null data until arrive offset + uint32_t null_size = (offset - map->block_map->num_valid_pages) * info->zns_page_size; + char *null_buffer = (char *)calloc(null_size, sizeof(char)); + int ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, + null_buffer, null_size); + if (ret) + return ret; free(null_buffer); - int ret = append_to_zone(info, map->block_ptr->physical_zone_saddr, - &physical_addr, buffer, size); + increase_zone_write_ptr(map->block_map, null_size / info->zns_page_size); + // append data + ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, + buffer, size); if (ret) return ret; - increase_zone_num_valid_page(map->block_ptr); + increase_zone_write_ptr(map->block_map, size / info->zns_page_size); pthread_mutex_unlock(&map->logical_block_lock); } else { pthread_mutex_unlock(&map->logical_block_lock); - int ret = append_to_zone(info, info->curr_log_zone->physical_zone_saddr, - &physical_addr, buffer, size); + int ret = append_to_log_zone(info, address, + buffer, size); if (ret) return ret; - update_map(info, address / info->zns_page_size, physical_addr); - check_to_change_log_zone(info, physical_addr); } return 0; } @@ -585,23 +672,22 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) maps[i]->page_maps = maps[i]->page_maps->next; free(tmp); } - if (maps[i]->block_ptr) { - pthread_mutex_destroy(&maps[i]->block_ptr->page_counter_lock); - free(maps[i]->block_ptr); + if (maps[i]->block_map) { + pthread_mutex_destroy(&maps[i]->block_map->page_counter_lock); + free(maps[i]->block_map); } pthread_mutex_destroy(&maps[i]->logical_block_lock); // Clear maps[i] free(maps[i]); } free(maps); - while (info->used_log_zones_list) { zone_info *tmp = info->used_log_zones_list; info->used_log_zones_list = info->used_log_zones_list->chain; pthread_mutex_destroy(&tmp->page_counter_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } - while (info->free_zones_list) { zone_info *tmp = info->free_zones_list; info->free_zones_list = info->free_zones_list->chain; From 2d356ed8f0a53b8a9ab194b8e2e90e6b13567e47 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 1 Oct 2022 17:18:54 +0000 Subject: [PATCH 034/101] reduce zasl --- src/m23-ftl/zns_device.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index f287c5e..a3eb109 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -74,7 +74,7 @@ struct zns_info { uint32_t zns_num_zones; uint32_t zone_num_pages; uint32_t num_data_zones; - uint32_t maximum_data_transfer_size; + uint32_t max_data_transfer_size; uint32_t zone_append_size_limit; pthread_mutex_t zones_list_lock; // Log zone maintainance @@ -555,16 +555,16 @@ int init_ss_zns_device(struct zdev_init_params *params, // set user capacity bytes = #data_zones * zone_capacity (*my_dev)->capacity_bytes = (info->num_data_zones) * (*my_dev)->tparams.zns_zone_capacity; - // set maximum_data_transfer_size + // set max_data_transfer_size struct nvme_id_ctrl ctrl; nvme_identify_ctrl(info->fd, &ctrl); void *regs = mmap(NULL, getpagesize(), PROT_READ,MAP_SHARED, info->fd, 0); - info->maximum_data_transfer_size = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * + info->max_data_transfer_size = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * (*my_dev)->lba_size_bytes; // set zone_append_size_limit struct nvme_zns_id_ctrl id; nvme_zns_identify_ctrl(info->fd, &id); - info->zone_append_size_limit = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) * + info->zone_append_size_limit = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * (*my_dev)->lba_size_bytes; // set log zone page mapped hashmap size to num_data_zones info->logical_block_maps = (logical_block_map **)calloc(info->num_data_zones, From 498e5dc4afa412938fb35a4d470f01a3a5f1ffe2 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 2 Oct 2022 11:24:46 +0000 Subject: [PATCH 035/101] user can read data size of which is multiple lba --- src/m23-ftl/zns_device.cpp | 575 +++++++++++++++++++------------------ 1 file changed, 299 insertions(+), 276 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a3eb109..674be9a 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -35,12 +35,12 @@ extern "C" { // Structure for zone in zns struct zone_info { + unsigned long long physical_zone_saddr; uint32_t num_valid_pages; // counter uint32_t write_ptr; - unsigned long long physical_zone_saddr; pthread_mutex_t page_counter_lock; pthread_mutex_t write_ptr_lock; - zone_info *chain; // Chained in free_zones and used_log_zones_list + zone_info *next; // linked in free_zones and used_log_zones_list }; // Structure for pagemap in log @@ -78,23 +78,48 @@ struct zns_info { uint32_t zone_append_size_limit; pthread_mutex_t zones_list_lock; // Log zone maintainance + zone_info *curr_log_zone; int num_used_log_zones; zone_info *used_log_zones_list; - zone_info *curr_log_zone; + zone_info *used_log_zones_list_tail; // Free zones array uint32_t num_free_zones; zone_info *free_zones_list; zone_info *free_zones_list_tail; // Logical to Physical mapping page and block - logical_block_map **logical_block_maps; // Page mapped hashmap for log zone + logical_block_map *logical_block_maps; // Page mapped hashmap for log zone }; +static inline void increase_zone_num_valid_page(zone_info *zone, + uint32_t num_pages); +static inline void decrease_zone_num_valid_page(zone_info *zone, + uint32_t num_pages); +static inline void increase_zone_write_ptr(zone_info *zone, + uint32_t num_pages); +static inline void decrease_zone_write_ptr(zone_info *zone, + uint32_t num_pages); +static inline uint32_t hash_function(uint32_t key, uint32_t base); +static inline uint32_t offset_function(uint32_t key, uint32_t base); +static void change_log_zone(zns_info *info); +static int lookup_map(zns_info *info, + uint32_t logical_page_addr, unsigned long long *physical_addr); +static void update_map(zns_info *info, + uint32_t logical_page_addr, unsigned long long physical_addr); +static int read_from_nvme(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size); +static int append_to_data_zone(zns_info *info, unsigned long long saddr, + void *buffer, uint32_t size); +static int append_to_log_zone(zns_info *info, uint64_t logical_addr, + void *buffer, uint32_t size); +static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone); +static void *garbage_collection(void *info_ptr); + // int count(zone_info *ptr) // { // int count = 0; // while (ptr) { // ++count; -// ptr = ptr->chain; +// ptr = ptr->next; // } // return count; // } @@ -131,37 +156,201 @@ static inline void decrease_zone_write_ptr(zone_info *zone, pthread_mutex_unlock(&zone->write_ptr_lock); } +static inline uint32_t hash_function(uint32_t key, uint32_t base) +{ + return key / base; +} + +static inline uint32_t offset_function(uint32_t key, uint32_t base) +{ + return key % base; +} + +static void change_log_zone(zns_info *info) +{ + // TODO: Add a check on no of log zone used, trigger gc if it reaches the condition + // Check if current log zone is ended, then change to next free log zone; FIXME + pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; + if (info->used_log_zones_list) + info->used_log_zones_list_tail->next = info->curr_log_zone; + else + info->used_log_zones_list = info->curr_log_zone; + info->used_log_zones_list_tail = info->curr_log_zone; + ++info->num_used_log_zones; + info->curr_log_zone = NULL; + pthread_mutex_unlock(&info->zones_list_lock); + while (info->num_used_log_zones == info->num_log_zones); + //Dequeue from free_zone to curr_log_zone; + while (!info->curr_log_zone) { + pthread_mutex_lock(&info->zones_list_lock); + if (info->num_free_zones > 1) { + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->next; + info->curr_log_zone->next = NULL; + --info->num_free_zones; + } + pthread_mutex_unlock(&info->zones_list_lock); + } +} + +static int lookup_map(zns_info *info, + uint32_t logical_page_addr, unsigned long long *physical_addr) +{ + uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); + //Lock the logical block + pthread_mutex_lock(&info->logical_block_maps[index].logical_block_lock); + //Search in log + for (page_map *head = info->logical_block_maps[index].page_maps; head; head = head->next) { + if (head->logical_addr > logical_page_addr) + break; + if (head->logical_addr == logical_page_addr) { + *physical_addr = head->physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); + return 0; + } + } + for (page_map *head = info->logical_block_maps[index].old_page_maps; head; head = head->next) { + if (head->logical_addr > logical_page_addr) + break; + if (head->logical_addr == logical_page_addr) { + *physical_addr = head->physical_addr; + pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); + return 0; + } + } + //If not present provide data block addr + uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); + *physical_addr = info->logical_block_maps[index].block_map->physical_zone_saddr + offset; + pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); + return 0; +} + +static void update_map(zns_info *info, + uint32_t logical_page_addr, unsigned long long physical_addr) +{ + uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); + logical_block_map *maps = info->logical_block_maps; + increase_zone_num_valid_page(info->curr_log_zone, 1); + increase_zone_write_ptr(info->curr_log_zone, 1); + //Fill in hashmap + //printf("Added to %d\n",index); + //Lock for the update in log + pthread_mutex_lock(&maps[index].logical_block_lock); + if (!maps[index].page_maps) { + maps[index].page_maps = (page_map *)calloc(1, sizeof(page_map)); + maps[index].page_maps->logical_addr = logical_page_addr; + maps[index].page_maps->physical_addr = physical_addr; + maps[index].page_maps->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); + return; + } + if (maps[index].page_maps->logical_addr == logical_page_addr) { + //Update log counter + decrease_zone_num_valid_page(maps[index].page_maps->page_zone_info, 1); + maps[index].page_maps->physical_addr = physical_addr; + maps[index].page_maps->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); + return; + } + if (maps[index].page_maps->logical_addr > logical_page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = maps[index].page_maps; + maps[index].page_maps = tmp; + tmp->logical_addr = logical_page_addr; + tmp->physical_addr = physical_addr; + tmp->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); + return; + } + page_map *ptr = maps[index].page_maps; + while (ptr->next) { + if (ptr->next->logical_addr == logical_page_addr) { + //Update log counter + decrease_zone_num_valid_page(ptr->next->page_zone_info, 1); + ptr->next->physical_addr = physical_addr; + ptr->next->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); + return; + } else if (ptr->next->logical_addr > logical_page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = ptr->next; + ptr->next = tmp; + tmp->logical_addr = logical_page_addr; + tmp->physical_addr = physical_addr; + tmp->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); + return; + } + ptr = ptr->next; + } + ptr->next = (page_map *)calloc(1, sizeof(page_map)); + ptr->next->logical_addr = logical_page_addr; + ptr->next->physical_addr = physical_addr; + ptr->next->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&maps[index].logical_block_lock); +} + +static int read_from_nvme(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size) +{ + unsigned short num_pages = size / info->zns_page_size - 1; + nvme_read(info->fd, info->nsid, physical_addr, num_pages, 0, 0, 0, 0, 0, + size, buffer, 0, NULL); + // ss_nvme_show_status(errno); + return errno; +} + static int append_to_data_zone(zns_info *info, unsigned long long saddr, void *buffer, uint32_t size) { uint32_t appended_size = 0; while (appended_size < size) { unsigned long long physical_addr = 0ULL; - if (info->zone_append_size_limit < size - appended_size) { - unsigned short num_pages = info->zone_append_size_limit / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, saddr, num_pages - 1, 0, 0, 0, 0, - info->zone_append_size_limit, (char *)buffer + appended_size, - 0, NULL, &physical_addr); - appended_size += info->zone_append_size_limit; - } else { - uint32_t curr_append_size = size - appended_size; - unsigned short num_pages = curr_append_size / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, saddr, num_pages - 1, 0, 0, 0, 0, - curr_append_size, (char *)buffer + appended_size, - 0, NULL, &physical_addr); - appended_size += curr_append_size; - } + uint32_t curr_append_size = info->zone_append_size_limit; + if (curr_append_size > size - appended_size) + curr_append_size = size - appended_size; + unsigned short num_curr_append_pages = curr_append_size / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, saddr, num_curr_append_pages - 1, 0, 0, 0, 0, + curr_append_size, (char *)buffer + appended_size, + 0, NULL, &physical_addr); + if (errno) + return errno; + appended_size += curr_append_size; } // ss_nvme_show_status(errno); return errno; } -static int read_from_nvme(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size) +static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, + void *buffer, uint32_t size) { - unsigned short num_pages = size / info->zns_page_size - 1; - nvme_read(info->fd, info->nsid, physical_addr, num_pages, 0, 0, 0, 0, 0, - size, buffer, 0, NULL); + uint32_t appended_size = 0; + while (appended_size < size) { + unsigned long long physical_addr = 0ULL; + bool need_to_change_log_zone = true; + uint32_t curr_append_size = (info->zone_num_pages - info->curr_log_zone->write_ptr) * + info->zns_page_size; + if (curr_append_size > info->zone_append_size_limit) { + curr_append_size = info->zone_append_size_limit; + need_to_change_log_zone = false; + } + if (curr_append_size > size - appended_size) { + curr_append_size = size - appended_size; + need_to_change_log_zone = false; + } + unsigned short num_curr_append_pages = curr_append_size / info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, + info->curr_log_zone->physical_zone_saddr, num_curr_append_pages - 1, + 0, 0, 0, 0, curr_append_size, (char *)buffer + appended_size, + 0, NULL, &physical_addr); + if (errno) + return errno; + for (uint32_t i = 0; i < num_curr_append_pages; ++i, ++logical_page_addr, ++physical_addr) + update_map(info, logical_page_addr, physical_addr); + if (need_to_change_log_zone) + change_log_zone(info); + appended_size += curr_append_size; + } // ss_nvme_show_status(errno); return errno; } @@ -224,23 +413,21 @@ static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) // Append old data zone to free zones list pthread_mutex_lock(&info->zones_list_lock); if (old_used_zone) { - decrease_zone_num_valid_page(old_used_zone, old_used_zone->num_valid_pages); + decrease_zone_write_ptr(old_used_zone, old_used_zone->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, old_used_zone->physical_zone_saddr, false, NVME_ZNS_ZSA_RESET, 0, NULL); - if (info->free_zones_list) { - info->free_zones_list_tail->chain = old_used_zone; - info->free_zones_list_tail = info->free_zones_list_tail->chain; - } else { + if (info->free_zones_list) + info->free_zones_list_tail->next = old_used_zone; + else info->free_zones_list = old_used_zone; - info->free_zones_list_tail = old_used_zone; - } + info->free_zones_list_tail = old_used_zone; ++info->num_free_zones; } pthread_mutex_unlock(&info->zones_list_lock); } -static void *gc_thread(void *info_ptr) +static void *garbage_collection(void *info_ptr) { zns_info *info = (zns_info *)info_ptr; uint32_t index = 0; @@ -250,38 +437,45 @@ static void *gc_thread(void *info_ptr) if (!info->run_gc) return NULL; } - logical_block_map *ptr = info->logical_block_maps[index]; + logical_block_map *ptr = &info->logical_block_maps[index]; while(!ptr->page_maps) { index = (index + 1) % info->num_data_zones; - ptr = info->logical_block_maps[index]; + ptr = &info->logical_block_maps[index]; if (!info->run_gc) return NULL; } pthread_mutex_lock(&info->zones_list_lock); - // Get free zone and nullify the chain + // Get free zone and nullify the next zone_info *free_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - if (info->num_free_zones == 1) + info->free_zones_list = info->free_zones_list->next; + if (!info->free_zones_list) info->free_zones_list_tail = NULL; - free_zone->chain = NULL; + free_zone->next = NULL; --info->num_free_zones; pthread_mutex_unlock(&info->zones_list_lock); + if (!info->run_gc) + return NULL; // Merge the logical block to data zone merge(info, ptr, free_zone); // Check used log zone valid counter if zero reset and add to free zone list // FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list // Reset if used log zone : if valid pages is reference is zero + if (!info->run_gc) + return NULL; + pthread_mutex_lock(&info->zones_list_lock); for (zone_info *prev = NULL, *free = NULL, - *tmp = info->used_log_zones_list; tmp;) { - if (tmp->num_valid_pages == 0) { - pthread_mutex_lock(&info->zones_list_lock); + *tmp = info->used_log_zones_list; info->run_gc && tmp;) { + if (!tmp->num_valid_pages) { free = tmp; - tmp = tmp->chain; - if (prev) - prev->chain = tmp; - else + tmp = tmp->next; + if (prev) { + prev->next = tmp; + } else { info->used_log_zones_list = tmp; - free->chain = NULL; + if (!tmp) + info->used_log_zones_list_tail = tmp; + } + free->next = NULL; // reset decrease_zone_write_ptr(free, free->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, @@ -289,207 +483,23 @@ static void *gc_thread(void *info_ptr) NVME_ZNS_ZSA_RESET, 0, NULL); // Remove from used_log_zones --info->num_used_log_zones; - if(info->free_zones_list) { - info->free_zones_list_tail->chain = free; - info->free_zones_list_tail = info->free_zones_list_tail->chain; - } else { + if (info->free_zones_list) + info->free_zones_list_tail->next = free; + else info->free_zones_list = free; - info->free_zones_list_tail = free; - } + info->free_zones_list_tail = free; ++info->num_free_zones; - pthread_mutex_unlock(&info->zones_list_lock); } else { prev = tmp; - tmp = tmp->chain; + tmp = tmp->next; } } + pthread_mutex_unlock(&info->zones_list_lock); index = (index + 1) % info->num_data_zones; } return NULL; } -static inline uint32_t hash_function(uint32_t key, uint32_t base) -{ - return key / base; -} - -static inline uint32_t offset_function(uint32_t key, uint32_t base) -{ - return key % base; -} - -static void change_log_zone(zns_info *info) -{ - // TODO: Add a check on no of log zone used, trigger gc if it reaches the condition - // Check if current log zone is ended, then change to next free log zone; FIXME - pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; - if (info->used_log_zones_list) { - zone_info *head = info->used_log_zones_list; - while(head->chain) - head = head->chain; - head->chain = info->curr_log_zone; - } else { - info->used_log_zones_list = info->curr_log_zone; - } - ++info->num_used_log_zones; - info->curr_log_zone = NULL; - pthread_mutex_unlock(&info->zones_list_lock); - while (info->num_used_log_zones == info->num_log_zones); - //Dequeue from free_zone to curr_log_zone; - while (!info->curr_log_zone) { - pthread_mutex_lock(&info->zones_list_lock); - if (info->num_free_zones > 1) { - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - info->curr_log_zone->chain = NULL; - info->curr_log_zone->num_valid_pages = 0; - --info->num_free_zones; - } - pthread_mutex_unlock(&info->zones_list_lock); - } -} - -static void update_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long physical_addr) -{ - uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); - logical_block_map **maps = info->logical_block_maps; - increase_zone_num_valid_page(info->curr_log_zone, 1); - increase_zone_write_ptr(info->curr_log_zone, 1); - //Fill in hashmap - //printf("Added to %d\n",index); - //Lock for the update in log - pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); - if (!maps[index]->page_maps) { - maps[index]->page_maps = (page_map *)calloc(1, sizeof(page_map)); - maps[index]->page_maps->page_zone_info = info->curr_log_zone; - maps[index]->page_maps->logical_addr = logical_page_addr; - maps[index]->page_maps->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return; - } - if (maps[index]->page_maps->logical_addr == logical_page_addr) { - //Update log counter - decrease_zone_num_valid_page(maps[index]->page_maps->page_zone_info, 1); - maps[index]->page_maps->page_zone_info = info->curr_log_zone; - maps[index]->page_maps->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return; - } - if (maps[index]->page_maps->logical_addr > logical_page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); - tmp->next = maps[index]->page_maps; - maps[index]->page_maps = tmp; - tmp->page_zone_info = info->curr_log_zone; - tmp->logical_addr = logical_page_addr; - tmp->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return; - } - page_map *ptr = maps[index]->page_maps; - while (ptr->next) { - if (ptr->next->logical_addr == logical_page_addr) { - //Update log counter - decrease_zone_num_valid_page(ptr->next->page_zone_info, 1); - ptr->next->page_zone_info = info->curr_log_zone; - ptr->next->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return; - } else if (ptr->next->logical_addr > logical_page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); - tmp->next = ptr->next; - ptr->next = tmp; - tmp->page_zone_info = info->curr_log_zone; - tmp->logical_addr = logical_page_addr; - tmp->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return; - } - ptr = ptr->next; - } - ptr->next = (page_map *)calloc(1, sizeof(page_map)); - ptr->next->page_zone_info = info->curr_log_zone; - ptr->next->logical_addr = logical_page_addr; - ptr->next->physical_addr = physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); -} - -static int lookup_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long *physical_addr) -{ - uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); - //Lock the logical block - pthread_mutex_lock(&info->logical_block_maps[index]->logical_block_lock); - //Search in log - for (page_map *head = info->logical_block_maps[index]->page_maps; head; head = head->next) { - if (head->logical_addr == logical_page_addr) { - *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return 0; - } - } - for (page_map *head = info->logical_block_maps[index]->old_page_maps; head; head = head->next) { - if (head->logical_addr == logical_page_addr) { - *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return 0; - } - } - //If not present provide data block addr - uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); - *physical_addr = info->logical_block_maps[index]->block_map->physical_zone_saddr + offset; - pthread_mutex_unlock(&info->logical_block_maps[index]->logical_block_lock); - return 0; -} - -static int append_to_log_zone(zns_info *info, uint64_t logical_addr, - void *buffer, uint32_t size) -{ - uint64_t logical_page_addr = logical_addr / info->zns_page_size; - uint32_t appended_size = 0; - while (appended_size < size) { - unsigned long long physical_addr = 0ULL; - uint32_t num_zone_remain_pages = info->zone_num_pages - - info->curr_log_zone->write_ptr; - uint32_t num_zone_remain_size = num_zone_remain_pages * info->zns_page_size; - if (num_zone_remain_size <= info->zone_append_size_limit && - num_zone_remain_size <= size - appended_size) { - nvme_zns_append(info->fd, info->nsid, - info->curr_log_zone->physical_zone_saddr, num_zone_remain_pages - 1, - 0, 0, 0, 0, num_zone_remain_size, (char *)buffer + appended_size, - 0, NULL, &physical_addr); - if (errno) - return errno; - for (uint32_t i = 0; i < num_zone_remain_pages; ++i, ++logical_page_addr, ++physical_addr) - update_map(info, logical_page_addr, physical_addr); - change_log_zone(info); - appended_size += num_zone_remain_size; - } else if (info->zone_append_size_limit < size - appended_size) { - unsigned short num_pages = info->zone_append_size_limit / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, - num_pages - 1, 0, 0, 0, 0, info->zone_append_size_limit, - (char *)buffer + appended_size, 0, NULL, &physical_addr); - if (errno) - return errno; - for (uint32_t i = 0; i < num_pages; ++i, ++logical_page_addr, ++physical_addr) - update_map(info, logical_page_addr, physical_addr); - appended_size += info->zone_append_size_limit; - } else { - unsigned short num_pages = (size - appended_size) / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->physical_zone_saddr, - num_pages - 1, 0, 0, 0, 0, size - appended_size, - (char *)buffer + appended_size, 0, NULL, &physical_addr); - if (errno) - return errno; - for (uint32_t i = 0; i < num_pages; ++i, ++logical_page_addr, ++physical_addr) - update_map(info, logical_page_addr, physical_addr); - appended_size += size - appended_size; - } - } - // ss_nvme_show_status(errno); - return errno; -} - int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { @@ -566,18 +576,16 @@ int init_ss_zns_device(struct zdev_init_params *params, nvme_zns_identify_ctrl(info->fd, &id); info->zone_append_size_limit = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * (*my_dev)->lba_size_bytes; - // set log zone page mapped hashmap size to num_data_zones - info->logical_block_maps = (logical_block_map **)calloc(info->num_data_zones, - sizeof(logical_block_map *)); // init zones_list_lock pthread_mutex_init(&info->zones_list_lock, NULL); // set all zone index to free_zones_list info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); - pthread_mutex_init(&info->free_zones_list->page_counter_lock, NULL); info->free_zones_list_tail = info->free_zones_list; + pthread_mutex_init(&info->free_zones_list->page_counter_lock, NULL); + pthread_mutex_init(&info->free_zones_list->write_ptr_lock, NULL); for (uint32_t i = 1; i < info->zns_num_zones; ++i) { - info->free_zones_list_tail->chain = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail = info->free_zones_list_tail->chain; + info->free_zones_list_tail->next = (zone_info *)calloc(1, sizeof(zone_info)); + info->free_zones_list_tail = info->free_zones_list_tail->next; info->free_zones_list_tail->physical_zone_saddr = i * info->zone_num_pages; pthread_mutex_init(&info->free_zones_list_tail->page_counter_lock, NULL); pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); @@ -586,35 +594,46 @@ int init_ss_zns_device(struct zdev_init_params *params, info->num_free_zones = info->zns_num_zones; //Set current log zone to 0th zone info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; - if (info->num_free_zones == 1) + info->free_zones_list = info->free_zones_list->next; + if (!info->free_zones_list) info->free_zones_list_tail = NULL; - info->curr_log_zone->chain = NULL; + info->curr_log_zone->next = NULL; info->curr_log_zone->num_valid_pages = 0; --info->num_free_zones; + // set log zone page mapped hashmap size to num_data_zones + info->logical_block_maps = (logical_block_map *)calloc(info->num_data_zones, + sizeof(logical_block_map)); for (uint32_t i = 0; i < info->num_data_zones; ++i) { - info->logical_block_maps[i] = (logical_block_map *)calloc(1, sizeof(logical_block_map)); - info->logical_block_maps[i]->logical_block_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->logical_block_maps[i]->logical_block_lock, NULL); + info->logical_block_maps[i].logical_block_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->logical_block_maps[i].logical_block_lock, NULL); } //Start GC info->run_gc = true; - pthread_create(&info->gc_thread_id, NULL, &gc_thread, (void *)info); + pthread_create(&info->gc_thread_id, NULL, &garbage_collection, (void *)info); return 0; } int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { - unsigned long long physical_addr = 0ULL; zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous - pthread_mutex_lock(&info->zones_list_lock); - int ret = lookup_map(info, address / info->zns_page_size, &physical_addr); - if (ret) - return ret; - read_from_nvme(info, physical_addr, buffer, size); - pthread_mutex_unlock(&info->zones_list_lock); + uint32_t logical_page_addr = address / info->zns_page_size; + uint32_t logical_page_addr_end = (address + size) / info->zns_page_size; + char *curr_read_addr = (char *)buffer; + while (logical_page_addr < logical_page_addr_end) { + unsigned long long physical_addr = 0ULL; + pthread_mutex_lock(&info->zones_list_lock); + int ret = lookup_map(info, logical_page_addr, &physical_addr); + if (ret) { + pthread_mutex_unlock(&info->zones_list_lock); + return ret; + } + read_from_nvme(info, physical_addr, curr_read_addr, info->zns_page_size); + pthread_mutex_unlock(&info->zones_list_lock); + ++logical_page_addr; + curr_read_addr += info->zns_page_size; + } return errno; } @@ -624,7 +643,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; uint32_t index = hash_function(address / info->zns_page_size, info->zone_num_pages); - logical_block_map *map = info->logical_block_maps[index]; + logical_block_map *map = &info->logical_block_maps[index]; pthread_mutex_lock(&map->logical_block_lock); // if can write to data zone directly if (!map->old_page_maps && map->block_map && @@ -636,21 +655,24 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, char *null_buffer = (char *)calloc(null_size, sizeof(char)); int ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, null_buffer, null_size); - if (ret) - return ret; free(null_buffer); - increase_zone_write_ptr(map->block_map, null_size / info->zns_page_size); + if (ret) { + pthread_mutex_unlock(&map->logical_block_lock); + return ret; + } + increase_zone_write_ptr(map->block_map, offset - map->block_map->num_valid_pages); // append data ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, buffer, size); - if (ret) + if (ret) { + pthread_mutex_unlock(&map->logical_block_lock); return ret; + } increase_zone_write_ptr(map->block_map, size / info->zns_page_size); pthread_mutex_unlock(&map->logical_block_lock); } else { pthread_mutex_unlock(&map->logical_block_lock); - int ret = append_to_log_zone(info, address, - buffer, size); + int ret = append_to_log_zone(info, address / info->zns_page_size, buffer, size); if (ret) return ret; } @@ -663,41 +685,42 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) // Kill gc info->run_gc = false; pthread_join(info->gc_thread_id, NULL); - logical_block_map **maps = info->logical_block_maps; + logical_block_map *maps = info->logical_block_maps; // free hashmap for (uint32_t i = 0; i < info->num_data_zones; ++i) { // Clear all log heads for a logical block - while (maps[i]->page_maps) { - page_map *tmp = maps[i]->page_maps; - maps[i]->page_maps = maps[i]->page_maps->next; + while (maps[i].page_maps) { + page_map *tmp = maps[i].page_maps; + maps[i].page_maps = maps[i].page_maps->next; free(tmp); } - if (maps[i]->block_map) { - pthread_mutex_destroy(&maps[i]->block_map->page_counter_lock); - free(maps[i]->block_map); + if (maps[i].block_map) { + pthread_mutex_destroy(&maps[i].block_map->page_counter_lock); + pthread_mutex_destroy(&maps[i].block_map->write_ptr_lock); + free(maps[i].block_map); } - pthread_mutex_destroy(&maps[i]->logical_block_lock); - // Clear maps[i] - free(maps[i]); + pthread_mutex_destroy(&maps[i].logical_block_lock); } free(maps); while (info->used_log_zones_list) { zone_info *tmp = info->used_log_zones_list; - info->used_log_zones_list = info->used_log_zones_list->chain; + info->used_log_zones_list = info->used_log_zones_list->next; pthread_mutex_destroy(&tmp->page_counter_lock); pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } while (info->free_zones_list) { zone_info *tmp = info->free_zones_list; - info->free_zones_list = info->free_zones_list->chain; + info->free_zones_list = info->free_zones_list->next; pthread_mutex_destroy(&tmp->page_counter_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } pthread_mutex_destroy(&info->curr_log_zone->page_counter_lock); + pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); free(info->curr_log_zone); pthread_mutex_destroy(&info->zones_list_lock); - free(my_dev->_private); + free(info); free(my_dev); return 0; } From 70b7be090235fcc83dce5e68394adb7b405809d9 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 2 Oct 2022 12:34:13 +0000 Subject: [PATCH 036/101] revise parameter's type --- src/m23-ftl/zns_device.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 674be9a..1689c96 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -102,14 +102,14 @@ static inline uint32_t hash_function(uint32_t key, uint32_t base); static inline uint32_t offset_function(uint32_t key, uint32_t base); static void change_log_zone(zns_info *info); static int lookup_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long *physical_addr); + uint64_t logical_page_addr, unsigned long long *physical_addr); static void update_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long physical_addr); + uint64_t logical_page_addr, unsigned long long physical_addr); static int read_from_nvme(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size); static int append_to_data_zone(zns_info *info, unsigned long long saddr, void *buffer, uint32_t size); -static int append_to_log_zone(zns_info *info, uint64_t logical_addr, +static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, void *buffer, uint32_t size); static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone); static void *garbage_collection(void *info_ptr); @@ -194,7 +194,7 @@ static void change_log_zone(zns_info *info) } static int lookup_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long *physical_addr) + uint64_t logical_page_addr, unsigned long long *physical_addr) { uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); //Lock the logical block @@ -226,7 +226,7 @@ static int lookup_map(zns_info *info, } static void update_map(zns_info *info, - uint32_t logical_page_addr, unsigned long long physical_addr) + uint64_t logical_page_addr, unsigned long long physical_addr) { uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); logical_block_map *maps = info->logical_block_maps; From c876abb7122db3dc6a329f3ca430892699f3329c Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 2 Oct 2022 18:54:45 +0000 Subject: [PATCH 037/101] read multiple pages --- src/m23-ftl/zns_device.cpp | 522 +++++++++++++++++++------------------ 1 file changed, 275 insertions(+), 247 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 1689c96..a2a8114 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -33,30 +33,30 @@ SOFTWARE. extern "C" { -// Structure for zone in zns +// zone in zns struct zone_info { - unsigned long long physical_zone_saddr; - uint32_t num_valid_pages; // counter + unsigned long long zone_saddr; + uint32_t num_valid_pages; uint32_t write_ptr; - pthread_mutex_t page_counter_lock; + pthread_mutex_t num_valid_pages_lock; pthread_mutex_t write_ptr_lock; zone_info *next; // linked in free_zones and used_log_zones_list }; -// Structure for pagemap in log +// page map for log zones struct page_map { - uint64_t logical_addr; + uint64_t logical_page_addr; unsigned long long physical_addr; zone_info *page_zone_info; page_map *next; // page map for each logical block }; -// Structure for logical block [contains page map and block map] -struct logical_block_map { +// logical block contains data in log zone (page map) and data in data zone (block map) +struct logical_block { uint64_t logical_block_saddr; - page_map *page_maps; // page mapping for this logical block + page_map *page_maps; // page mapping for this logical block (log zone) page_map *old_page_maps; - zone_info *block_map; // Point to zone_info + zone_info *block_map; // block mapping for this logical block (data zone) //TODO: LOCK the access pthread_mutex_t logical_block_lock; }; @@ -64,30 +64,30 @@ struct logical_block_map { struct zns_info { // Values from init parameters int num_log_zones; - int gc_trigger; - pthread_t gc_thread_id; + int gc_wmark; + pthread_t gc_thread; bool run_gc; - // Query the nisd for following info + // Query the nsid for following info int fd; unsigned nsid; uint32_t zns_page_size; uint32_t zns_num_zones; uint32_t zone_num_pages; uint32_t num_data_zones; - uint32_t max_data_transfer_size; - uint32_t zone_append_size_limit; - pthread_mutex_t zones_list_lock; - // Log zone maintainance + uint32_t mdts; // max data transfer size (read limit) + uint32_t zasl; // zone append size limit (append limit) + // Log zones zone_info *curr_log_zone; int num_used_log_zones; zone_info *used_log_zones_list; zone_info *used_log_zones_list_tail; - // Free zones array + // Free zones uint32_t num_free_zones; zone_info *free_zones_list; zone_info *free_zones_list_tail; - // Logical to Physical mapping page and block - logical_block_map *logical_block_maps; // Page mapped hashmap for log zone + pthread_mutex_t zones_list_lock; + // logical block corresponding to each data zone + logical_block *logical_blocks; }; static inline void increase_zone_num_valid_page(zone_info *zone, @@ -98,46 +98,36 @@ static inline void increase_zone_write_ptr(zone_info *zone, uint32_t num_pages); static inline void decrease_zone_write_ptr(zone_info *zone, uint32_t num_pages); -static inline uint32_t hash_function(uint32_t key, uint32_t base); -static inline uint32_t offset_function(uint32_t key, uint32_t base); +static inline uint32_t get_block_index(uint32_t key, uint32_t base); +static inline uint32_t get_data_offset(uint32_t key, uint32_t base); static void change_log_zone(zns_info *info); -static int lookup_map(zns_info *info, - uint64_t logical_page_addr, unsigned long long *physical_addr); -static void update_map(zns_info *info, - uint64_t logical_page_addr, unsigned long long physical_addr); -static int read_from_nvme(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size); +static bool look_up_map(page_map *maps, uint64_t logical_page_addr, + unsigned long long *physical_addr); +static void update_map(zns_info *info, uint64_t logical_page_addr, + unsigned long long physical_addr); +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size); static int append_to_data_zone(zns_info *info, unsigned long long saddr, void *buffer, uint32_t size); static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, - void *buffer, uint32_t size); -static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone); + void *buffer, uint32_t size); +static void merge(zns_info *info, logical_block *block, zone_info *new_zone); static void *garbage_collection(void *info_ptr); -// int count(zone_info *ptr) -// { -// int count = 0; -// while (ptr) { -// ++count; -// ptr = ptr->next; -// } -// return count; -// } - static inline void increase_zone_num_valid_page(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->page_counter_lock); + pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages += num_pages; - pthread_mutex_unlock(&zone->page_counter_lock); + pthread_mutex_unlock(&zone->num_valid_pages_lock); } static inline void decrease_zone_num_valid_page(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->page_counter_lock); + pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages -= num_pages; - pthread_mutex_unlock(&zone->page_counter_lock); + pthread_mutex_unlock(&zone->num_valid_pages_lock); } static inline void increase_zone_write_ptr(zone_info *zone, @@ -156,20 +146,18 @@ static inline void decrease_zone_write_ptr(zone_info *zone, pthread_mutex_unlock(&zone->write_ptr_lock); } -static inline uint32_t hash_function(uint32_t key, uint32_t base) +static inline uint32_t get_block_index(uint32_t key, uint32_t base) { return key / base; } -static inline uint32_t offset_function(uint32_t key, uint32_t base) +static inline uint32_t get_data_offset(uint32_t key, uint32_t base) { return key % base; } static void change_log_zone(zns_info *info) { - // TODO: Add a check on no of log zone used, trigger gc if it reaches the condition - // Check if current log zone is ended, then change to next free log zone; FIXME pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; if (info->used_log_zones_list) info->used_log_zones_list_tail->next = info->curr_log_zone; @@ -193,190 +181,175 @@ static void change_log_zone(zns_info *info) } } -static int lookup_map(zns_info *info, - uint64_t logical_page_addr, unsigned long long *physical_addr) +static bool look_up_map(page_map *maps, uint64_t logical_page_addr, + unsigned long long *physical_addr) { - uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); //Lock the logical block - pthread_mutex_lock(&info->logical_block_maps[index].logical_block_lock); - //Search in log - for (page_map *head = info->logical_block_maps[index].page_maps; head; head = head->next) { - if (head->logical_addr > logical_page_addr) - break; - if (head->logical_addr == logical_page_addr) { - *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); - return 0; - } - } - for (page_map *head = info->logical_block_maps[index].old_page_maps; head; head = head->next) { - if (head->logical_addr > logical_page_addr) - break; - if (head->logical_addr == logical_page_addr) { + //Search in log zone + for (page_map *head = maps; head; head = head->next) { + if (head->logical_page_addr > logical_page_addr) + return false; + if (head->logical_page_addr == logical_page_addr) { *physical_addr = head->physical_addr; - pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); - return 0; + return true; } } - //If not present provide data block addr - uint32_t offset = offset_function(logical_page_addr, info->zone_num_pages); - *physical_addr = info->logical_block_maps[index].block_map->physical_zone_saddr + offset; - pthread_mutex_unlock(&info->logical_block_maps[index].logical_block_lock); - return 0; + return false; } -static void update_map(zns_info *info, - uint64_t logical_page_addr, unsigned long long physical_addr) +static void update_map(zns_info *info, uint64_t logical_page_addr, + unsigned long long physical_addr) { - uint32_t index = hash_function(logical_page_addr, info->zone_num_pages); - logical_block_map *maps = info->logical_block_maps; + uint32_t index = get_block_index(logical_page_addr, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; increase_zone_num_valid_page(info->curr_log_zone, 1); increase_zone_write_ptr(info->curr_log_zone, 1); - //Fill in hashmap - //printf("Added to %d\n",index); - //Lock for the update in log - pthread_mutex_lock(&maps[index].logical_block_lock); - if (!maps[index].page_maps) { - maps[index].page_maps = (page_map *)calloc(1, sizeof(page_map)); - maps[index].page_maps->logical_addr = logical_page_addr; - maps[index].page_maps->physical_addr = physical_addr; - maps[index].page_maps->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + //Lock for updating page map + pthread_mutex_lock(&block->logical_block_lock); + if (!block->page_maps) { + block->page_maps = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps->logical_page_addr = logical_page_addr; + block->page_maps->physical_addr = physical_addr; + block->page_maps->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&block->logical_block_lock); return; } - if (maps[index].page_maps->logical_addr == logical_page_addr) { + if (block->page_maps->logical_page_addr == logical_page_addr) { //Update log counter - decrease_zone_num_valid_page(maps[index].page_maps->page_zone_info, 1); - maps[index].page_maps->physical_addr = physical_addr; - maps[index].page_maps->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + decrease_zone_num_valid_page(block->page_maps->page_zone_info, 1); + block->page_maps->physical_addr = physical_addr; + block->page_maps->page_zone_info = info->curr_log_zone; + pthread_mutex_unlock(&block->logical_block_lock); return; } - if (maps[index].page_maps->logical_addr > logical_page_addr) { + if (block->page_maps->logical_page_addr > logical_page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); - tmp->next = maps[index].page_maps; - maps[index].page_maps = tmp; - tmp->logical_addr = logical_page_addr; + tmp->next = block->page_maps; + block->page_maps = tmp; + tmp->logical_page_addr = logical_page_addr; tmp->physical_addr = physical_addr; tmp->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); return; } - page_map *ptr = maps[index].page_maps; + page_map *ptr = block->page_maps; while (ptr->next) { - if (ptr->next->logical_addr == logical_page_addr) { - //Update log counter + if (ptr->next->logical_page_addr == logical_page_addr) { + //Update log counter decrease_zone_num_valid_page(ptr->next->page_zone_info, 1); ptr->next->physical_addr = physical_addr; ptr->next->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); return; - } else if (ptr->next->logical_addr > logical_page_addr) { + } else if (ptr->next->logical_page_addr > logical_page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = ptr->next; ptr->next = tmp; - tmp->logical_addr = logical_page_addr; + tmp->logical_page_addr = logical_page_addr; tmp->physical_addr = physical_addr; tmp->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); return; } ptr = ptr->next; } ptr->next = (page_map *)calloc(1, sizeof(page_map)); - ptr->next->logical_addr = logical_page_addr; + ptr->next->logical_page_addr = logical_page_addr; ptr->next->physical_addr = physical_addr; ptr->next->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&maps[index].logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); } -static int read_from_nvme(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size) +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size) { - unsigned short num_pages = size / info->zns_page_size - 1; - nvme_read(info->fd, info->nsid, physical_addr, num_pages, 0, 0, 0, 0, 0, - size, buffer, 0, NULL); - // ss_nvme_show_status(errno); + unsigned short num_pages = size / info->zns_page_size; + nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, + 0U, 0U, 0U, 0U, 0U,size, buffer, 0U, NULL); return errno; } static int append_to_data_zone(zns_info *info, unsigned long long saddr, void *buffer, uint32_t size) { - uint32_t appended_size = 0; + uint32_t appended_size = 0U; while (appended_size < size) { unsigned long long physical_addr = 0ULL; - uint32_t curr_append_size = info->zone_append_size_limit; + uint32_t curr_append_size = info->zasl; if (curr_append_size > size - appended_size) curr_append_size = size - appended_size; unsigned short num_curr_append_pages = curr_append_size / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, saddr, num_curr_append_pages - 1, 0, 0, 0, 0, - curr_append_size, (char *)buffer + appended_size, - 0, NULL, &physical_addr); + nvme_zns_append(info->fd, info->nsid, saddr, num_curr_append_pages - 1, + 0U, 0U, 0U, 0U, curr_append_size, + (char *)buffer + appended_size, 0U, NULL, &physical_addr); if (errno) return errno; appended_size += curr_append_size; } - // ss_nvme_show_status(errno); return errno; } static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, - void *buffer, uint32_t size) + void *buffer, uint32_t size) { - uint32_t appended_size = 0; + uint32_t appended_size = 0U; while (appended_size < size) { unsigned long long physical_addr = 0ULL; bool need_to_change_log_zone = true; - uint32_t curr_append_size = (info->zone_num_pages - info->curr_log_zone->write_ptr) * + uint32_t curr_append_size = (info->zone_num_pages - + info->curr_log_zone->write_ptr) * info->zns_page_size; - if (curr_append_size > info->zone_append_size_limit) { - curr_append_size = info->zone_append_size_limit; + if (curr_append_size > info->zasl) { + curr_append_size = info->zasl; need_to_change_log_zone = false; } if (curr_append_size > size - appended_size) { curr_append_size = size - appended_size; need_to_change_log_zone = false; } - unsigned short num_curr_append_pages = curr_append_size / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, - info->curr_log_zone->physical_zone_saddr, num_curr_append_pages - 1, - 0, 0, 0, 0, curr_append_size, (char *)buffer + appended_size, - 0, NULL, &physical_addr); + unsigned short num_curr_append_pages = curr_append_size / + info->zns_page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->zone_saddr, + num_curr_append_pages - 1, 0U, 0U, 0U, 0U, + curr_append_size, (char *)buffer + appended_size, + 0U, NULL, &physical_addr); if (errno) return errno; - for (uint32_t i = 0; i < num_curr_append_pages; ++i, ++logical_page_addr, ++physical_addr) + for (uint32_t i = 0U; i < num_curr_append_pages; + ++i, ++logical_page_addr, ++physical_addr) update_map(info, logical_page_addr, physical_addr); if (need_to_change_log_zone) change_log_zone(info); appended_size += curr_append_size; } - // ss_nvme_show_status(errno); return errno; } -static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) +static void merge(zns_info *info, logical_block *block, zone_info *new_zone) { - pthread_mutex_lock(&map->logical_block_lock); - map->old_page_maps = map->page_maps; - map->page_maps = NULL; - pthread_mutex_unlock(&map->logical_block_lock); - page_map *ptr = map->old_page_maps; - zone_info *old_used_zone = map->block_map; - uint32_t zone_append_page_limit = info->zone_append_size_limit / info->zns_page_size; - char * buffer = (char *)calloc(info->zone_append_size_limit, sizeof(char)); - for (uint32_t offset = 0; offset < info->zone_num_pages; ++offset) { + pthread_mutex_lock(&block->logical_block_lock); + block->old_page_maps = block->page_maps; + block->page_maps = NULL; + pthread_mutex_unlock(&block->logical_block_lock); + page_map *ptr = block->old_page_maps; + zone_info *old_used_data_zone = block->block_map; + uint32_t zone_append_page_limit = info->zasl / info->zns_page_size; + char * buffer = (char *)calloc(info->zasl, sizeof(char)); + for (uint32_t offset = 0U; offset < info->zone_num_pages; ++offset) { unsigned long long page_physical_addr = 0ULL; bool have_data = false; bool still_have_data = false; - if (old_used_zone) { + // if data in data zone + if (old_used_data_zone) { have_data = true; - page_physical_addr = old_used_zone->physical_zone_saddr + offset; - decrease_zone_write_ptr(old_used_zone, 1); - if (old_used_zone->write_ptr) + page_physical_addr = old_used_data_zone->zone_saddr + offset; + decrease_zone_write_ptr(old_used_data_zone, 1); + if (old_used_data_zone->write_ptr) still_have_data = true; } - if (ptr && ptr->logical_addr == map->logical_block_saddr + offset) { + // if data in log zone + if (ptr && + ptr->logical_page_addr == block->logical_block_saddr + offset) { have_data = true; page_physical_addr = ptr->physical_addr; decrease_zone_num_valid_page(ptr->page_zone_info, 1); @@ -385,43 +358,45 @@ static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) still_have_data = true; } if (have_data) - read_from_nvme(info, page_physical_addr, - buffer + (offset % zone_append_page_limit) * info->zns_page_size, - info->zns_page_size); + read_from_zns(info, page_physical_addr, + buffer + (offset % zone_append_page_limit) * info->zns_page_size, + info->zns_page_size); if (!still_have_data) { - append_to_data_zone(info, new_zone->physical_zone_saddr, buffer, + append_to_data_zone(info, new_zone->zone_saddr, buffer, (offset % zone_append_page_limit + 1) * info->zns_page_size); - increase_zone_write_ptr(new_zone, offset % zone_append_page_limit + 1); + increase_zone_write_ptr(new_zone, + offset % zone_append_page_limit + 1); break; } if (offset % zone_append_page_limit == zone_append_page_limit - 1) { - append_to_data_zone(info, new_zone->physical_zone_saddr, - buffer, info->zone_append_size_limit); + append_to_data_zone(info, new_zone->zone_saddr, + buffer, info->zasl); increase_zone_write_ptr(new_zone, zone_append_page_limit); - memset(buffer, 0, info->zone_append_size_limit); + memset(buffer, 0, info->zasl); } } free(buffer); - pthread_mutex_lock(&map->logical_block_lock); - while (map->old_page_maps) { - page_map *tmp = map->old_page_maps; - map->old_page_maps = map->old_page_maps->next; + pthread_mutex_lock(&block->logical_block_lock); + while (block->old_page_maps) { + page_map *tmp = block->old_page_maps; + block->old_page_maps = block->old_page_maps->next; free(tmp); } - map->block_map = new_zone; - pthread_mutex_unlock(&map->logical_block_lock); + block->block_map = new_zone; + pthread_mutex_unlock(&block->logical_block_lock); // Append old data zone to free zones list pthread_mutex_lock(&info->zones_list_lock); - if (old_used_zone) { - decrease_zone_write_ptr(old_used_zone, old_used_zone->write_ptr); + if (old_used_data_zone) { + decrease_zone_write_ptr(old_used_data_zone, + old_used_data_zone->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, - old_used_zone->physical_zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0, NULL); + old_used_data_zone->zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); if (info->free_zones_list) - info->free_zones_list_tail->next = old_used_zone; + info->free_zones_list_tail->next = old_used_data_zone; else - info->free_zones_list = old_used_zone; - info->free_zones_list_tail = old_used_zone; + info->free_zones_list = old_used_data_zone; + info->free_zones_list_tail = old_used_data_zone; ++info->num_free_zones; } pthread_mutex_unlock(&info->zones_list_lock); @@ -430,17 +405,17 @@ static void merge(zns_info *info, logical_block_map *map, zone_info *new_zone) static void *garbage_collection(void *info_ptr) { zns_info *info = (zns_info *)info_ptr; - uint32_t index = 0; + uint32_t index = 0U; while (info->run_gc) { - //Check condition - while (info->num_log_zones - info->num_used_log_zones > info->gc_trigger) { + while (info->num_log_zones - info->num_used_log_zones > + info->gc_wmark) { if (!info->run_gc) return NULL; } - logical_block_map *ptr = &info->logical_block_maps[index]; - while(!ptr->page_maps) { + logical_block *block = &info->logical_blocks[index]; + while(!block->page_maps) { index = (index + 1) % info->num_data_zones; - ptr = &info->logical_block_maps[index]; + block = &info->logical_blocks[index]; if (!info->run_gc) return NULL; } @@ -455,17 +430,17 @@ static void *garbage_collection(void *info_ptr) pthread_mutex_unlock(&info->zones_list_lock); if (!info->run_gc) return NULL; - // Merge the logical block to data zone - merge(info, ptr, free_zone); - // Check used log zone valid counter if zero reset and add to free zone list - // FIXME: Remove zone from used_log_zones_list if valid_page is zero and add that zone to free_zones_list - // Reset if used log zone : if valid pages is reference is zero + // Merge logical block to data zone + merge(info, block, free_zone); if (!info->run_gc) return NULL; + // Check used log zone valid counter if zero reset and add to free zone list + // Remove zone from used_log_zones_list if valid_page is zero and add that zone to free zones list pthread_mutex_lock(&info->zones_list_lock); for (zone_info *prev = NULL, *free = NULL, *tmp = info->used_log_zones_list; info->run_gc && tmp;) { if (!tmp->num_valid_pages) { + // Remove from used_log_zones free = tmp; tmp = tmp->next; if (prev) { @@ -479,9 +454,8 @@ static void *garbage_collection(void *info_ptr) // reset decrease_zone_write_ptr(free, free->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, - free->physical_zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0, NULL); - // Remove from used_log_zones + free->zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); --info->num_used_log_zones; if (info->free_zones_list) info->free_zones_list_tail->next = free; @@ -509,8 +483,8 @@ int init_ss_zns_device(struct zdev_init_params *params, zns_info *info = (zns_info *)(*my_dev)->_private; // set num_log_zones info->num_log_zones = params->log_zones; - // set gc_trigger - info->gc_trigger = params->gc_wmark; + // set gc_wmark + info->gc_wmark = params->gc_wmark; // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -525,14 +499,14 @@ int init_ss_zns_device(struct zdev_init_params *params, } // reset device if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0, true, - NVME_ZNS_ZSA_RESET, 0, NULL); + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); if (ret) { printf("Zone reset failed %d\n", ret); return ret; } } - // set zns_lba_size(or)zns_page_size : Its same for now! + // set zns_lba_size or zns_page_size : Its same for now! nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); if (ret) { @@ -544,9 +518,9 @@ int init_ss_zns_device(struct zdev_init_params *params, info->zns_page_size = (*my_dev)->tparams.zns_lba_size; // set zns_num_zones nvme_zone_report zns_report; - ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, - sizeof(zns_report), &zns_report); + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, + NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, + false, sizeof(zns_report), &zns_report); if (ret) { printf("Failed to report zones, ret %d\n", ret); return ret; @@ -568,26 +542,35 @@ int init_ss_zns_device(struct zdev_init_params *params, // set max_data_transfer_size struct nvme_id_ctrl ctrl; nvme_identify_ctrl(info->fd, &ctrl); - void *regs = mmap(NULL, getpagesize(), PROT_READ,MAP_SHARED, info->fd, 0); - info->max_data_transfer_size = (1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * - (*my_dev)->lba_size_bytes; + void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); + if (errno) { + printf("Failed to mmap\n"); + return errno; + } + info->mdts = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) - 1) * + (*my_dev)->lba_size_bytes; // set zone_append_size_limit struct nvme_zns_id_ctrl id; nvme_zns_identify_ctrl(info->fd, &id); - info->zone_append_size_limit = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * - (*my_dev)->lba_size_bytes; + info->zasl = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * + (*my_dev)->lba_size_bytes; + munmap(regs, getpagesize()); + if (errno) { + printf("Failed to munmap\n"); + return errno; + } // init zones_list_lock pthread_mutex_init(&info->zones_list_lock, NULL); // set all zone index to free_zones_list info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); info->free_zones_list_tail = info->free_zones_list; - pthread_mutex_init(&info->free_zones_list->page_counter_lock, NULL); + pthread_mutex_init(&info->free_zones_list->num_valid_pages_lock, NULL); pthread_mutex_init(&info->free_zones_list->write_ptr_lock, NULL); for (uint32_t i = 1; i < info->zns_num_zones; ++i) { info->free_zones_list_tail->next = (zone_info *)calloc(1, sizeof(zone_info)); info->free_zones_list_tail = info->free_zones_list_tail->next; - info->free_zones_list_tail->physical_zone_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_list_tail->page_counter_lock, NULL); + info->free_zones_list_tail->zone_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_list_tail->num_valid_pages_lock, NULL); pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); } // set num_free_zones @@ -598,18 +581,18 @@ int init_ss_zns_device(struct zdev_init_params *params, if (!info->free_zones_list) info->free_zones_list_tail = NULL; info->curr_log_zone->next = NULL; - info->curr_log_zone->num_valid_pages = 0; + info->curr_log_zone->num_valid_pages = 0U; --info->num_free_zones; // set log zone page mapped hashmap size to num_data_zones - info->logical_block_maps = (logical_block_map *)calloc(info->num_data_zones, - sizeof(logical_block_map)); - for (uint32_t i = 0; i < info->num_data_zones; ++i) { - info->logical_block_maps[i].logical_block_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->logical_block_maps[i].logical_block_lock, NULL); + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].logical_block_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->logical_blocks[i].logical_block_lock, NULL); } //Start GC info->run_gc = true; - pthread_create(&info->gc_thread_id, NULL, &garbage_collection, (void *)info); + pthread_create(&info->gc_thread, NULL, &garbage_collection, (void *)info); return 0; } @@ -619,21 +602,63 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, zns_info *info = (zns_info *)my_dev->_private; //FIXME: Proision for contiguos block read, but not written contiguous uint32_t logical_page_addr = address / info->zns_page_size; - uint32_t logical_page_addr_end = (address + size) / info->zns_page_size; - char *curr_read_addr = (char *)buffer; - while (logical_page_addr < logical_page_addr_end) { + uint32_t index = get_block_index(logical_page_addr, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + pthread_mutex_lock(&block->logical_block_lock); + // read data from data zone + if (block->block_map) { + uint32_t read_size = 0U; + uint32_t offset = get_data_offset(logical_page_addr, + info->zone_num_pages); + while (read_size < size) { + uint32_t curr_read_size = info->mdts; + if (curr_read_size > size - read_size) + curr_read_size = size - read_size; + read_from_zns(info, block->block_map->zone_saddr + offset + read_size, + buffer, curr_read_size); + read_size += curr_read_size; + } + } + // read data from log zone + unsigned long long curr_start_physical_addr = 0ULL; + uint32_t curr_read_offset = 0U; + uint32_t curr_read_size = 0U; + unsigned long long prev_physical_addr = 0ULL; + page_map *curr_page_map = block->page_maps ? block->page_maps : + block->old_page_maps; + for (uint32_t i = 0U; i < size; i += info->zns_page_size, ++logical_page_addr) { unsigned long long physical_addr = 0ULL; - pthread_mutex_lock(&info->zones_list_lock); - int ret = lookup_map(info, logical_page_addr, &physical_addr); - if (ret) { - pthread_mutex_unlock(&info->zones_list_lock); - return ret; + bool get_addr = look_up_map(curr_page_map, + logical_page_addr, &physical_addr); + if (get_addr) { + if (!curr_read_size) { + curr_start_physical_addr = physical_addr; + curr_read_offset = i; + } else if (physical_addr - prev_physical_addr != 1) { // if physical address are not continuous + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, curr_read_size); + curr_start_physical_addr = physical_addr; + curr_read_offset = i; + curr_read_size = 0U; + } + curr_read_size += info->zns_page_size; + if (curr_read_size == info->mdts) { // if current read size is equal to mdts, then read data from zns + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, curr_read_size); + curr_read_size = 0U; + } else { + prev_physical_addr = physical_addr; + } + } else if (curr_read_size) { // if physical address are not continuous + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, curr_read_size); + curr_read_size = 0U; } - read_from_nvme(info, physical_addr, curr_read_addr, info->zns_page_size); - pthread_mutex_unlock(&info->zones_list_lock); - ++logical_page_addr; - curr_read_addr += info->zns_page_size; } + if (curr_read_size) // read the rest of data + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, curr_read_size); + pthread_mutex_unlock(&block->logical_block_lock); return errno; } @@ -641,38 +666,41 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; - uint32_t index = hash_function(address / info->zns_page_size, - info->zone_num_pages); - logical_block_map *map = &info->logical_block_maps[index]; - pthread_mutex_lock(&map->logical_block_lock); + uint32_t index = get_block_index(address / info->zns_page_size, + info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + pthread_mutex_lock(&block->logical_block_lock); // if can write to data zone directly - if (!map->old_page_maps && map->block_map && - map->block_map->write_ptr < info->zone_num_pages) { - uint32_t offset = offset_function(address / info->zns_page_size, + if (!block->old_page_maps && block->block_map && + block->block_map->write_ptr < info->zone_num_pages) { + uint32_t offset = get_data_offset(address / info->zns_page_size, info->zone_num_pages); // append null data until arrive offset - uint32_t null_size = (offset - map->block_map->num_valid_pages) * info->zns_page_size; + uint32_t null_size = (offset - block->block_map->num_valid_pages) * + info->zns_page_size; char *null_buffer = (char *)calloc(null_size, sizeof(char)); - int ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, + int ret = append_to_data_zone(info, block->block_map->zone_saddr, null_buffer, null_size); free(null_buffer); if (ret) { - pthread_mutex_unlock(&map->logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); return ret; } - increase_zone_write_ptr(map->block_map, offset - map->block_map->num_valid_pages); + increase_zone_write_ptr(block->block_map, + offset - block->block_map->num_valid_pages); // append data - ret = append_to_data_zone(info, map->block_map->physical_zone_saddr, + ret = append_to_data_zone(info, block->block_map->zone_saddr, buffer, size); if (ret) { - pthread_mutex_unlock(&map->logical_block_lock); + pthread_mutex_unlock(&block->logical_block_lock); return ret; } - increase_zone_write_ptr(map->block_map, size / info->zns_page_size); - pthread_mutex_unlock(&map->logical_block_lock); + increase_zone_write_ptr(block->block_map, size / info->zns_page_size); + pthread_mutex_unlock(&block->logical_block_lock); } else { - pthread_mutex_unlock(&map->logical_block_lock); - int ret = append_to_log_zone(info, address / info->zns_page_size, buffer, size); + pthread_mutex_unlock(&block->logical_block_lock); + int ret = append_to_log_zone(info, address / info->zns_page_size, + buffer, size); if (ret) return ret; } @@ -684,39 +712,39 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) zns_info *info = (zns_info *)my_dev->_private; // Kill gc info->run_gc = false; - pthread_join(info->gc_thread_id, NULL); - logical_block_map *maps = info->logical_block_maps; + pthread_join(info->gc_thread, NULL); + logical_block *blocks = info->logical_blocks; // free hashmap - for (uint32_t i = 0; i < info->num_data_zones; ++i) { + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { // Clear all log heads for a logical block - while (maps[i].page_maps) { - page_map *tmp = maps[i].page_maps; - maps[i].page_maps = maps[i].page_maps->next; + while (blocks[i].page_maps) { + page_map *tmp = blocks[i].page_maps; + blocks[i].page_maps = blocks[i].page_maps->next; free(tmp); } - if (maps[i].block_map) { - pthread_mutex_destroy(&maps[i].block_map->page_counter_lock); - pthread_mutex_destroy(&maps[i].block_map->write_ptr_lock); - free(maps[i].block_map); + if (blocks[i].block_map) { + pthread_mutex_destroy(&blocks[i].block_map->num_valid_pages_lock); + pthread_mutex_destroy(&blocks[i].block_map->write_ptr_lock); + free(blocks[i].block_map); } - pthread_mutex_destroy(&maps[i].logical_block_lock); + pthread_mutex_destroy(&blocks[i].logical_block_lock); } - free(maps); + free(blocks); while (info->used_log_zones_list) { zone_info *tmp = info->used_log_zones_list; info->used_log_zones_list = info->used_log_zones_list->next; - pthread_mutex_destroy(&tmp->page_counter_lock); + pthread_mutex_destroy(&tmp->num_valid_pages_lock); pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } while (info->free_zones_list) { zone_info *tmp = info->free_zones_list; info->free_zones_list = info->free_zones_list->next; - pthread_mutex_destroy(&tmp->page_counter_lock); + pthread_mutex_destroy(&tmp->num_valid_pages_lock); pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } - pthread_mutex_destroy(&info->curr_log_zone->page_counter_lock); + pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); free(info->curr_log_zone); pthread_mutex_destroy(&info->zones_list_lock); From c24b7fda4c0659e1d2aaa10fe6b96bcdbdd5e7be Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Fri, 7 Oct 2022 21:33:37 +0000 Subject: [PATCH 038/101] Basic struct and class decl --- src/m45-rocksdb/S2FileSystem.h | 85 ++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index a7ab2d0..678e1f0 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -31,8 +31,93 @@ SOFTWARE. #include #include + +#define LOOKUP_MAP_SIZE 1000 namespace ROCKSDB_NAMESPACE { + struct mapEntries { + char *id; + void *ptr; + mapEntries *chain; + }; + + struct MYFS { + mapEntries *LookupMap[LOOKUP_MAP_SIZE]; //Map type to void ptrs; + }; + + struct Inode { + char EntityName[239]; + bool IsDir; + uint64_t FileSize; + uint64_t Indirect_ptr_lbas; + uint64_t Direct_data_lbas[320]; + }; + + struct Indirect_ptr { + uint64_t Direct_data_lbas[511]; + uint64_t Indirect_ptr_lbas; + }; + + struct dir_data { + char EntityName[252]; + uint32_t InodeNum; + }; + + struct Dir { + dir_data Entities[16]; + }; + + class MYFS_File { + private: + int inode; + char *fileName; + public: + IOStatus Read(); + IOStatus Write(); + IOStatus Close(); + }; + + + /* + *Creates read only MYFS_File object + */ + class MYFS_SequentialFile : public FSSequentialFile { + private: + std::string filename; + MYFS_File *fp; + uint64_t buffer_alignment_size; + public: + MYFS_SequentialFile(const std::string& fname, MYFS_File *fp); + virtual ~MYFS_SequentialFile(); + virtual IOStatus Read(size_t n,const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus PositionedRead(uint64_t offset, size_t n, + const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus Skip(uint64_t n) override; + virtual IOStatus InvalidateCache(size_t offset, size_t length) override { + return IOStatus::OK(); + }; + virtual bool use_direct_io() const override { return false; } + virtual size_t GetRequiredBufferAlignment() const override { + return buffer_alignment_size; + } + }; + + class MYFS_RandomAccessFile : public FSRandomAccessFile { + + }; + + class MYFS_WritableFile : public FSWritableFile { + + }; + + class MYFS_Directory : public FSDirectory { + + }; + + + class S2FileSystem : public FileSystem { public: // No copying allowed From 1384d5e8a74de22f20b720f895ab305efa4201dc Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 7 Oct 2022 21:57:03 +0000 Subject: [PATCH 039/101] support data the size of which is above a zone size --- src/m23-ftl/zns_device.cpp | 93 +++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a2a8114..a35396c 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -20,15 +20,15 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include #include -#include #include -#include +#include #include #include #include -#include #include "zns_device.h" extern "C" { @@ -547,12 +547,12 @@ int init_ss_zns_device(struct zdev_init_params *params, printf("Failed to mmap\n"); return errno; } - info->mdts = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) - 1) * + info->mdts = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) - 1) * (*my_dev)->lba_size_bytes; // set zone_append_size_limit struct nvme_zns_id_ctrl id; nvme_zns_identify_ctrl(info->fd, &id); - info->zasl = ((1 << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * + info->zasl = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * (*my_dev)->lba_size_bytes; munmap(regs, getpagesize()); if (errno) { @@ -666,43 +666,62 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; - uint32_t index = get_block_index(address / info->zns_page_size, - info->zone_num_pages); - logical_block *block = &info->logical_blocks[index]; - pthread_mutex_lock(&block->logical_block_lock); - // if can write to data zone directly - if (!block->old_page_maps && block->block_map && - block->block_map->write_ptr < info->zone_num_pages) { + while (size > 0) { + uint32_t index = get_block_index(address / info->zns_page_size, + info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; uint32_t offset = get_data_offset(address / info->zns_page_size, info->zone_num_pages); - // append null data until arrive offset - uint32_t null_size = (offset - block->block_map->num_valid_pages) * - info->zns_page_size; - char *null_buffer = (char *)calloc(null_size, sizeof(char)); - int ret = append_to_data_zone(info, block->block_map->zone_saddr, - null_buffer, null_size); - free(null_buffer); - if (ret) { + uint32_t curr_append_size = 0U; + pthread_mutex_lock(&block->logical_block_lock); + // if can write to data zone directly + if (!block->old_page_maps && block->block_map && + block->block_map->write_ptr <= offset) { + if (block->block_map->write_ptr < offset) { + // append null data until arrive offset + uint32_t null_size = (offset - block->block_map->write_ptr) * + info->zns_page_size; + char *null_buffer = (char *)calloc(null_size, sizeof(char)); + int ret = append_to_data_zone(info, block->block_map->zone_saddr, + null_buffer, null_size); + free(null_buffer); + if (ret) { + pthread_mutex_unlock(&block->logical_block_lock); + return ret; + } + increase_zone_write_ptr(block->block_map, + offset - block->block_map->write_ptr); + } + curr_append_size = (info->zone_num_pages - offset) * + info->zns_page_size; + if (curr_append_size > size) + curr_append_size = size; + int ret = append_to_data_zone(info, block->block_map->zone_saddr, + buffer, curr_append_size); + if (ret) { + pthread_mutex_unlock(&block->logical_block_lock); + return ret; + } + increase_zone_write_ptr(block->block_map, + curr_append_size / info->zns_page_size); pthread_mutex_unlock(&block->logical_block_lock); - return ret; - } - increase_zone_write_ptr(block->block_map, - offset - block->block_map->num_valid_pages); - // append data - ret = append_to_data_zone(info, block->block_map->zone_saddr, - buffer, size); - if (ret) { + } else { + curr_append_size = size; + if (!block->old_page_maps && block->block_map) { + uint32_t diff_size = (block->block_map->write_ptr - offset) * + info->zns_page_size; + if (curr_append_size > diff_size) + curr_append_size = diff_size; + } pthread_mutex_unlock(&block->logical_block_lock); - return ret; + int ret = append_to_log_zone(info, address / info->zns_page_size, + buffer, curr_append_size); + if (ret) + return ret; } - increase_zone_write_ptr(block->block_map, size / info->zns_page_size); - pthread_mutex_unlock(&block->logical_block_lock); - } else { - pthread_mutex_unlock(&block->logical_block_lock); - int ret = append_to_log_zone(info, address / info->zns_page_size, - buffer, size); - if (ret) - return ret; + address += curr_append_size; + buffer = (char *)buffer + curr_append_size; + size -= curr_append_size; } return 0; } From a2a9af6912ea17dc6282b85079cf0ac71b4debf7 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sat, 8 Oct 2022 14:19:13 +0000 Subject: [PATCH 040/101] Upgrade to struct def and functions --- src/m45-rocksdb/S2FileSystem.cc | 88 ++++++++++++++++++++++++++++++++- src/m45-rocksdb/S2FileSystem.h | 48 +++++++++++++----- 2 files changed, 123 insertions(+), 13 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 623aa5d..c1d5a81 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -29,6 +29,69 @@ SOFTWARE. #include namespace ROCKSDB_NAMESPACE { + + int Load_From_NVM(uint64_t addr, void *buffer, uint64_t size) { + return 0; + } + + int LookupMap_HashFunction(void *data) { + return *((int*) data) / LOOKUP_MAP_SIZE; + } + + int LookupMap_Lookup(MYFS *FSObj, std::string id, void *ptr) { + + } + + int LookupMap_Insert(MYFS *FSObj, std::string id, void *ptr) { + + } + + int Load_Children(Inode *ptr, std::string entitiyName, std::vector *children, bool loadChildren) { + //Check no of children and load it + uint64_t childrens_count = ptr->FileSize; + + } + + void Get_ParentPath(std::string path, std::string &parent) { + + } + + void Get_EntityName(std::string path, string::string &entityName) { + + } + + int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr) { + //Check if path in lookupMap cache + int isPresent = LookupMap_Lookup(FSObj, path, ptr); + if(!isPresent) + return 0; + + //if not : Get_Path_Inode for parent dir + std::string parent; + Inode *parentInode; + + Get_ParentPath(path, parent); + isPresent = Get_Path_Inode(FSObj, parent, parentInode); + if(isPresent) + return -1; + //Read parent dir and get asked inode number + if(parentInode->FileSize == 0) + return -1; + + //Get children + std::string entityName; + Get_EntityName(path, entityName); + uint32_t index = Load_Children(parentInode, entityName, NULL, false); + //Load the inode; + uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; + ptr = (Inode *) calloc(1, sizeof(Inode)); + isPresent = Load_From_NVM(address, ptr, (uint64_t) INODE_SIZE); + + //Put it in lookup Map + isPresent = LookupMap_Insert(FSObj, path, ptr); + } + + S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) { FileSystem::Default(); std::string sdelimiter = ":"; @@ -53,6 +116,29 @@ namespace ROCKSDB_NAMESPACE { assert(this->_zns_dev->capacity_bytes != 0); ss_dprintf(DBG_FS_1, "device %s is opened and initialized, reported LBA size is %u and capacity %lu \n", device.c_str(), this->_zns_dev->lba_size_bytes, this->_zns_dev->capacity_bytes); + + + //INIT File System + //TODO: In case of persistency; Read following data from Super block + //Init Bitmaps from disk + if (debug) + std::cout<<"Init MYFS"<FileSystemObj; + this->FileSystemObj.FileSystemCapacity = this->_zns_dev->capacity_bytes; + this->FileSystemObj.LogicalBlockSize = this->_zns_dev->lba_size_bytes; + //We reserve a single block as super block and MAX_INODE_COUNT as + this->FileSystemObj.DataBlockCount = (this->FileSystemObj.FileSystemCapacity / this->FileSystemObj.LogicalBlockSize + - (MAX_INODE_COUNT + 1)); + if (debug) + std::cout<<"File System params : "<FileSystemObj.FileSystemCapacity<<" "<< + this->FileSystemObj.LogicalBlockSize<<" "<FileSystemObj.DataBlockCount<FileSystemObj.DataBitMap = (bool*) calloc(this->FileSystemObj.DataBlockCount, sizeof(bool)); + + //Init root inode + //TODO: In case of persistency check if already present in disk + this->FileSystemObj.rootEntry = (Inode *) calloc(1,sizeof(Inode)); } S2FileSystem::~S2FileSystem() { @@ -253,4 +339,4 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } -} \ No newline at end of file +} diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index 678e1f0..484e388 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -33,6 +33,9 @@ SOFTWARE. #define LOOKUP_MAP_SIZE 1000 +#define MAX_INODE_COUNT 255 +#define INODE_SIZE 4096 +#define SUPER_BLOCK_SIZE 4096 namespace ROCKSDB_NAMESPACE { struct mapEntries { @@ -41,10 +44,6 @@ namespace ROCKSDB_NAMESPACE { mapEntries *chain; }; - struct MYFS { - mapEntries *LookupMap[LOOKUP_MAP_SIZE]; //Map type to void ptrs; - }; - struct Inode { char EntityName[239]; bool IsDir; @@ -66,15 +65,40 @@ namespace ROCKSDB_NAMESPACE { struct Dir { dir_data Entities[16]; }; - + + struct MYFS { + mapEntries *LookupCache[LOOKUP_MAP_SIZE]; //Map type to void ptrs; + bool InodeBitMap[MAX_INODE_COUNT]; + bool *DataBitMap; + uint64_t DataBlockCount; + uint64_t FileSystemCapacity; + uint32_t LogicalBlockSize; + Inode *rootEntry; + }; + + + int Load_From_NVM(uint64_t address, void *ptr, uint64_t size); + int Store_To_NVM(); + int Read_User_Data(); + void Get_ParentPath(std::string path, std::string &parent); + void Get_EntityName(std::string path, std::string &entityName); + void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); + int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); + int LookupMap_HashFunction(void *data); + class MYFS_File { private: - int inode; char *fileName; + bool created; + struct Inode *ptr; + void *curr_data_ptr; + MYFS *FSObj; public: - IOStatus Read(); - IOStatus Write(); - IOStatus Close(); + MYFS_File(); + ~MYFS_File(); + int Read(); + int Write(); + int Close(); }; @@ -83,11 +107,10 @@ namespace ROCKSDB_NAMESPACE { */ class MYFS_SequentialFile : public FSSequentialFile { private: - std::string filename; - MYFS_File *fp; + MYFS_File fp; uint64_t buffer_alignment_size; public: - MYFS_SequentialFile(const std::string& fname, MYFS_File *fp); + MYFS_SequentialFile(const std::string& fname, MYFS *FSObj); virtual ~MYFS_SequentialFile(); virtual IOStatus Read(size_t n,const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) override; @@ -216,6 +239,7 @@ namespace ROCKSDB_NAMESPACE { struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; + struct MYFS FileSystemObj; }; } From 483ffb24a6e71e4770e210196b5490ae346068f0 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 9 Oct 2022 00:36:07 +0000 Subject: [PATCH 041/101] fixed gc crash bug --- src/m23-ftl/zns_device.cpp | 655 +++++++++++++++++++------------------ src/m23-ftl/zns_device.h | 1 - 2 files changed, 335 insertions(+), 321 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a35396c..90f8087 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -114,6 +114,317 @@ static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, static void merge(zns_info *info, logical_block *block, zone_info *new_zone); static void *garbage_collection(void *info_ptr); +int init_ss_zns_device(struct zdev_init_params *params, + struct user_zns_device **my_dev) +{ + //Assign the private ptr to zns_info + *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); + (*my_dev)->_private = calloc(1, sizeof(zns_info)); + zns_info *info = (zns_info *)(*my_dev)->_private; + // set num_log_zones + info->num_log_zones = params->log_zones; + // set gc_wmark + info->gc_wmark = params->gc_wmark; + // set fd + info->fd = nvme_open(params->name); + if (info->fd < 0) { + printf("Dev %s opened failed %d\n", params->name, info->fd); + return errno; + } + // set nsid + int ret = nvme_get_nsid(info->fd, &info->nsid); + if (ret) { + printf("Error: failed to retrieve the namespace id %d\n", ret); + return ret; + } + // reset device + if (params->force_reset) { + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return ret; + } + } + // set zns_lba_size or zns_page_size : Its same for now! + nvme_id_ns ns; + ret = nvme_identify_ns(info->fd, info->nsid, &ns); + if (ret) { + printf("Failed to retrieve the nvme identify namespace %d\n", ret); + return ret; + } + (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; + (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; + info->zns_page_size = (*my_dev)->tparams.zns_lba_size; + // set zns_num_zones + nvme_zone_report zns_report; + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, + NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, + false, sizeof(zns_report), &zns_report); + if (ret) { + printf("Failed to report zones, ret %d\n", ret); + return ret; + } + (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); + info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; + // set num_data_zones = zns_num_zones - num_log_zones + info->num_data_zones = info->zns_num_zones - info->num_log_zones; + // set zone_num_pages + nvme_zns_id_ns data; + nvme_zns_identify_ns(info->fd, info->nsid, &data); + info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = #page_per_zone * zone_size + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + (*my_dev)->tparams.zns_lba_size; + // set user capacity bytes = #data_zones * zone_capacity + (*my_dev)->capacity_bytes = (info->num_data_zones) * + (*my_dev)->tparams.zns_zone_capacity; + // set max_data_transfer_size + struct nvme_id_ctrl ctrl; + nvme_identify_ctrl(info->fd, &ctrl); + void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); + if (errno) { + printf("Failed to mmap\n"); + return errno; + } + info->mdts = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * + (*my_dev)->lba_size_bytes; + // set zone_append_size_limit + struct nvme_zns_id_ctrl id; + nvme_zns_identify_ctrl(info->fd, &id); + info->zasl = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) * + (*my_dev)->lba_size_bytes; + munmap(regs, getpagesize()); + if (errno) { + printf("Failed to munmap\n"); + return errno; + } + // init zones_list_lock + pthread_mutex_init(&info->zones_list_lock, NULL); + // set all zone index to free_zones_list + info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); + info->free_zones_list_tail = info->free_zones_list; + pthread_mutex_init(&info->free_zones_list->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones_list->write_ptr_lock, NULL); + for (uint32_t i = 1; i < info->zns_num_zones; ++i) { + info->free_zones_list_tail->next = (zone_info *)calloc(1, sizeof(zone_info)); + info->free_zones_list_tail = info->free_zones_list_tail->next; + info->free_zones_list_tail->zone_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_list_tail->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); + } + // set num_free_zones + info->num_free_zones = info->zns_num_zones; + //Set current log zone to 0th zone + info->curr_log_zone = info->free_zones_list; + info->free_zones_list = info->free_zones_list->next; + if (!info->free_zones_list) + info->free_zones_list_tail = NULL; + info->curr_log_zone->next = NULL; + info->curr_log_zone->num_valid_pages = 0U; + --info->num_free_zones; + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].logical_block_saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->logical_blocks[i].logical_block_lock, NULL); + } + //Start GC + info->run_gc = true; + pthread_create(&info->gc_thread, NULL, &garbage_collection, (void *)info); + return 0; +} + +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + zns_info *info = (zns_info *)my_dev->_private; + uint32_t logical_page_addr = address / info->zns_page_size; + //FIXME: Proision for contiguos block read, but not written contiguous + while (size) { + uint32_t index = get_block_index(logical_page_addr, + info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + uint32_t offset = get_data_offset(logical_page_addr, + info->zone_num_pages); + uint32_t curr_block_read_size = (info->zone_num_pages - offset) * + info->zns_page_size; + if (curr_block_read_size > size) + curr_block_read_size = size; + pthread_mutex_lock(&block->logical_block_lock); + if (block->block_map) { + uint32_t read_size = 0U; + while (read_size < curr_block_read_size) { + uint32_t curr_read_size = info->mdts; + if (curr_read_size > curr_block_read_size - read_size) + curr_read_size = curr_block_read_size - read_size; + read_from_zns(info, + block->block_map->zone_saddr + offset + + read_size / info->zns_page_size, + (char *)buffer + read_size, curr_read_size); + read_size += curr_read_size; + } + } + unsigned long long prev_physical_addr = 0ULL; + unsigned long long curr_start_physical_addr = 0ULL; + uint32_t curr_read_offset = 0U; + uint32_t curr_read_size = 0U; + page_map *curr_page_map = block->page_maps ? block->page_maps : + block->old_page_maps; + for (uint32_t i = 0U; i < curr_block_read_size; + i += info->zns_page_size, ++logical_page_addr) { + unsigned long long physical_addr = 0ULL; + bool get_addr = look_up_map(curr_page_map, + logical_page_addr, &physical_addr); + if (get_addr) { + if (!curr_read_size) { + curr_start_physical_addr = physical_addr; + curr_read_offset = i; + } else if (physical_addr - prev_physical_addr != 1) { // if physical address are not continuous + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, + curr_read_size); + curr_start_physical_addr = physical_addr; + curr_read_offset = i; + curr_read_size = 0U; + } + curr_read_size += info->zns_page_size; + if (curr_read_size == info->mdts) { // if current read size is equal to mdts, then read data from zns + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, + curr_read_size); + curr_read_size = 0U; + } else { + prev_physical_addr = physical_addr; + } + } else if (curr_read_size) { // if physical address are not continuous + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, + curr_read_size); + curr_read_size = 0U; + } + } + if (curr_read_size) // read the rest of data + read_from_zns(info, curr_start_physical_addr, + (char *)buffer + curr_read_offset, curr_read_size); + pthread_mutex_unlock(&block->logical_block_lock); + buffer = (char *)buffer + curr_block_read_size; + size -= curr_block_read_size; + } + return errno; +} + +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + zns_info *info = (zns_info *)my_dev->_private; + while (size) { + uint32_t index = get_block_index(address / info->zns_page_size, + info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + uint32_t offset = get_data_offset(address / info->zns_page_size, + info->zone_num_pages); + uint32_t curr_append_size = 0U; + pthread_mutex_lock(&block->logical_block_lock); + // if can write to data zone directly + if (!block->old_page_maps && block->block_map && + block->block_map->write_ptr <= offset) { + if (block->block_map->write_ptr < offset) { + // append null data until arrive offset + uint32_t null_size = (offset - block->block_map->write_ptr) * + info->zns_page_size; + char *null_buffer = (char *)calloc(null_size, sizeof(char)); + int ret = append_to_data_zone(info, block->block_map->zone_saddr, + null_buffer, null_size); + free(null_buffer); + if (ret) { + pthread_mutex_unlock(&block->logical_block_lock); + return ret; + } + increase_zone_write_ptr(block->block_map, + offset - block->block_map->write_ptr); + } + curr_append_size = (info->zone_num_pages - offset) * + info->zns_page_size; + if (curr_append_size > size) + curr_append_size = size; + int ret = append_to_data_zone(info, block->block_map->zone_saddr, + buffer, curr_append_size); + if (ret) { + pthread_mutex_unlock(&block->logical_block_lock); + return ret; + } + increase_zone_write_ptr(block->block_map, + curr_append_size / info->zns_page_size); + pthread_mutex_unlock(&block->logical_block_lock); + } else { + curr_append_size = size; + if (!block->old_page_maps && block->block_map) { + uint32_t diff_size = (block->block_map->write_ptr - offset) * + info->zns_page_size; + if (curr_append_size > diff_size) + curr_append_size = diff_size; + } + pthread_mutex_unlock(&block->logical_block_lock); + int ret = append_to_log_zone(info, address / info->zns_page_size, + buffer, curr_append_size); + if (ret) + return ret; + } + address += curr_append_size; + buffer = (char *)buffer + curr_append_size; + size -= curr_append_size; + } + return 0; +} + +int deinit_ss_zns_device(struct user_zns_device *my_dev) +{ + zns_info *info = (zns_info *)my_dev->_private; + // Kill gc + info->run_gc = false; + pthread_join(info->gc_thread, NULL); + logical_block *blocks = info->logical_blocks; + // free hashmap + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + // Clear all log heads for a logical block + while (blocks[i].page_maps) { + page_map *tmp = blocks[i].page_maps; + blocks[i].page_maps = blocks[i].page_maps->next; + free(tmp); + } + if (blocks[i].block_map) { + pthread_mutex_destroy(&blocks[i].block_map->num_valid_pages_lock); + pthread_mutex_destroy(&blocks[i].block_map->write_ptr_lock); + free(blocks[i].block_map); + } + pthread_mutex_destroy(&blocks[i].logical_block_lock); + } + free(blocks); + while (info->used_log_zones_list) { + zone_info *tmp = info->used_log_zones_list; + info->used_log_zones_list = info->used_log_zones_list->next; + pthread_mutex_destroy(&tmp->num_valid_pages_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); + free(tmp); + } + while (info->free_zones_list) { + zone_info *tmp = info->free_zones_list; + info->free_zones_list = info->free_zones_list->next; + pthread_mutex_destroy(&tmp->num_valid_pages_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); + free(tmp); + } + pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); + pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); + free(info->curr_log_zone); + pthread_mutex_destroy(&info->zones_list_lock); + free(info); + free(my_dev); + return 0; +} + static inline void increase_zone_num_valid_page(zone_info *zone, uint32_t num_pages) { @@ -164,8 +475,8 @@ static void change_log_zone(zns_info *info) else info->used_log_zones_list = info->curr_log_zone; info->used_log_zones_list_tail = info->curr_log_zone; - ++info->num_used_log_zones; info->curr_log_zone = NULL; + ++info->num_used_log_zones; pthread_mutex_unlock(&info->zones_list_lock); while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; @@ -265,7 +576,7 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, { unsigned short num_pages = size / info->zns_page_size; nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, - 0U, 0U, 0U, 0U, 0U,size, buffer, 0U, NULL); + 0U, 0U, 0U, 0U, 0U, size, buffer, 0U, NULL); return errno; } @@ -297,7 +608,7 @@ static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, unsigned long long physical_addr = 0ULL; bool need_to_change_log_zone = true; uint32_t curr_append_size = (info->zone_num_pages - - info->curr_log_zone->write_ptr) * + info->curr_log_zone->write_ptr) * info->zns_page_size; if (curr_append_size > info->zasl) { curr_append_size = info->zasl; @@ -385,21 +696,21 @@ static void merge(zns_info *info, logical_block *block, zone_info *new_zone) block->block_map = new_zone; pthread_mutex_unlock(&block->logical_block_lock); // Append old data zone to free zones list - pthread_mutex_lock(&info->zones_list_lock); if (old_used_data_zone) { decrease_zone_write_ptr(old_used_data_zone, old_used_data_zone->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, old_used_data_zone->zone_saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_list_lock); if (info->free_zones_list) info->free_zones_list_tail->next = old_used_data_zone; else info->free_zones_list = old_used_data_zone; info->free_zones_list_tail = old_used_data_zone; ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_list_lock); } - pthread_mutex_unlock(&info->zones_list_lock); } static void *garbage_collection(void *info_ptr) @@ -436,26 +747,28 @@ static void *garbage_collection(void *info_ptr) return NULL; // Check used log zone valid counter if zero reset and add to free zone list // Remove zone from used_log_zones_list if valid_page is zero and add that zone to free zones list - pthread_mutex_lock(&info->zones_list_lock); for (zone_info *prev = NULL, *free = NULL, - *tmp = info->used_log_zones_list; info->run_gc && tmp;) { - if (!tmp->num_valid_pages) { + *curr = info->used_log_zones_list; info->run_gc && curr; ) { + if (!curr->num_valid_pages) { + // reset + decrease_zone_write_ptr(curr, curr->write_ptr); + nvme_zns_mgmt_send(info->fd, info->nsid, + curr->zone_saddr, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_list_lock); // Remove from used_log_zones - free = tmp; - tmp = tmp->next; + free = curr; + curr = curr->next; if (prev) { - prev->next = tmp; + prev->next = curr; + if (free == info->used_log_zones_list_tail) + info->used_log_zones_list_tail = prev; } else { - info->used_log_zones_list = tmp; - if (!tmp) - info->used_log_zones_list_tail = tmp; + info->used_log_zones_list = curr; + if (!info->used_log_zones_list) + info->used_log_zones_list_tail = NULL; } free->next = NULL; - // reset - decrease_zone_write_ptr(free, free->write_ptr); - nvme_zns_mgmt_send(info->fd, info->nsid, - free->zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0U, NULL); --info->num_used_log_zones; if (info->free_zones_list) info->free_zones_list_tail->next = free; @@ -463,313 +776,15 @@ static void *garbage_collection(void *info_ptr) info->free_zones_list = free; info->free_zones_list_tail = free; ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_list_lock); } else { - prev = tmp; - tmp = tmp->next; + prev = curr; + curr = curr->next; } } - pthread_mutex_unlock(&info->zones_list_lock); index = (index + 1) % info->num_data_zones; } return NULL; } -int init_ss_zns_device(struct zdev_init_params *params, - struct user_zns_device **my_dev) -{ - //Assign the private ptr to zns_info - *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); - (*my_dev)->_private = calloc(1, sizeof(zns_info)); - zns_info *info = (zns_info *)(*my_dev)->_private; - // set num_log_zones - info->num_log_zones = params->log_zones; - // set gc_wmark - info->gc_wmark = params->gc_wmark; - // set fd - info->fd = nvme_open(params->name); - if (info->fd < 0) { - printf("Dev %s opened failed %d\n", params->name, info->fd); - return errno; - } - // set nsid - int ret = nvme_get_nsid(info->fd, &info->nsid); - if (ret) { - printf("Error: failed to retrieve the namespace id %d\n", ret); - return ret; - } - // reset device - if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, - NVME_ZNS_ZSA_RESET, 0U, NULL); - if (ret) { - printf("Zone reset failed %d\n", ret); - return ret; - } - } - // set zns_lba_size or zns_page_size : Its same for now! - nvme_id_ns ns; - ret = nvme_identify_ns(info->fd, info->nsid, &ns); - if (ret) { - printf("Failed to retrieve the nvme identify namespace %d\n", ret); - return ret; - } - (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; - (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; - info->zns_page_size = (*my_dev)->tparams.zns_lba_size; - // set zns_num_zones - nvme_zone_report zns_report; - ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - false, sizeof(zns_report), &zns_report); - if (ret) { - printf("Failed to report zones, ret %d\n", ret); - return ret; - } - (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; - // set num_data_zones = zns_num_zones - num_log_zones - info->num_data_zones = info->zns_num_zones - info->num_log_zones; - // set zone_num_pages - nvme_zns_id_ns data; - nvme_zns_identify_ns(info->fd, info->nsid, &data); - info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; - // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * - (*my_dev)->tparams.zns_lba_size; - // set user capacity bytes = #data_zones * zone_capacity - (*my_dev)->capacity_bytes = (info->num_data_zones) * - (*my_dev)->tparams.zns_zone_capacity; - // set max_data_transfer_size - struct nvme_id_ctrl ctrl; - nvme_identify_ctrl(info->fd, &ctrl); - void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); - if (errno) { - printf("Failed to mmap\n"); - return errno; - } - info->mdts = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) - 1) * - (*my_dev)->lba_size_bytes; - // set zone_append_size_limit - struct nvme_zns_id_ctrl id; - nvme_zns_identify_ctrl(info->fd, &id); - info->zasl = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) - 1) * - (*my_dev)->lba_size_bytes; - munmap(regs, getpagesize()); - if (errno) { - printf("Failed to munmap\n"); - return errno; - } - // init zones_list_lock - pthread_mutex_init(&info->zones_list_lock, NULL); - // set all zone index to free_zones_list - info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail = info->free_zones_list; - pthread_mutex_init(&info->free_zones_list->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_list->write_ptr_lock, NULL); - for (uint32_t i = 1; i < info->zns_num_zones; ++i) { - info->free_zones_list_tail->next = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail = info->free_zones_list_tail->next; - info->free_zones_list_tail->zone_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_list_tail->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); - } - // set num_free_zones - info->num_free_zones = info->zns_num_zones; - //Set current log zone to 0th zone - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; - if (!info->free_zones_list) - info->free_zones_list_tail = NULL; - info->curr_log_zone->next = NULL; - info->curr_log_zone->num_valid_pages = 0U; - --info->num_free_zones; - // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); - for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - info->logical_blocks[i].logical_block_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->logical_blocks[i].logical_block_lock, NULL); - } - //Start GC - info->run_gc = true; - pthread_create(&info->gc_thread, NULL, &garbage_collection, (void *)info); - return 0; -} - -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, - void *buffer, uint32_t size) -{ - zns_info *info = (zns_info *)my_dev->_private; - //FIXME: Proision for contiguos block read, but not written contiguous - uint32_t logical_page_addr = address / info->zns_page_size; - uint32_t index = get_block_index(logical_page_addr, info->zone_num_pages); - logical_block *block = &info->logical_blocks[index]; - pthread_mutex_lock(&block->logical_block_lock); - // read data from data zone - if (block->block_map) { - uint32_t read_size = 0U; - uint32_t offset = get_data_offset(logical_page_addr, - info->zone_num_pages); - while (read_size < size) { - uint32_t curr_read_size = info->mdts; - if (curr_read_size > size - read_size) - curr_read_size = size - read_size; - read_from_zns(info, block->block_map->zone_saddr + offset + read_size, - buffer, curr_read_size); - read_size += curr_read_size; - } - } - // read data from log zone - unsigned long long curr_start_physical_addr = 0ULL; - uint32_t curr_read_offset = 0U; - uint32_t curr_read_size = 0U; - unsigned long long prev_physical_addr = 0ULL; - page_map *curr_page_map = block->page_maps ? block->page_maps : - block->old_page_maps; - for (uint32_t i = 0U; i < size; i += info->zns_page_size, ++logical_page_addr) { - unsigned long long physical_addr = 0ULL; - bool get_addr = look_up_map(curr_page_map, - logical_page_addr, &physical_addr); - if (get_addr) { - if (!curr_read_size) { - curr_start_physical_addr = physical_addr; - curr_read_offset = i; - } else if (physical_addr - prev_physical_addr != 1) { // if physical address are not continuous - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, curr_read_size); - curr_start_physical_addr = physical_addr; - curr_read_offset = i; - curr_read_size = 0U; - } - curr_read_size += info->zns_page_size; - if (curr_read_size == info->mdts) { // if current read size is equal to mdts, then read data from zns - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, curr_read_size); - curr_read_size = 0U; - } else { - prev_physical_addr = physical_addr; - } - } else if (curr_read_size) { // if physical address are not continuous - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, curr_read_size); - curr_read_size = 0U; - } - } - if (curr_read_size) // read the rest of data - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, curr_read_size); - pthread_mutex_unlock(&block->logical_block_lock); - return errno; -} - -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, - void *buffer, uint32_t size) -{ - zns_info *info = (zns_info *)my_dev->_private; - while (size > 0) { - uint32_t index = get_block_index(address / info->zns_page_size, - info->zone_num_pages); - logical_block *block = &info->logical_blocks[index]; - uint32_t offset = get_data_offset(address / info->zns_page_size, - info->zone_num_pages); - uint32_t curr_append_size = 0U; - pthread_mutex_lock(&block->logical_block_lock); - // if can write to data zone directly - if (!block->old_page_maps && block->block_map && - block->block_map->write_ptr <= offset) { - if (block->block_map->write_ptr < offset) { - // append null data until arrive offset - uint32_t null_size = (offset - block->block_map->write_ptr) * - info->zns_page_size; - char *null_buffer = (char *)calloc(null_size, sizeof(char)); - int ret = append_to_data_zone(info, block->block_map->zone_saddr, - null_buffer, null_size); - free(null_buffer); - if (ret) { - pthread_mutex_unlock(&block->logical_block_lock); - return ret; - } - increase_zone_write_ptr(block->block_map, - offset - block->block_map->write_ptr); - } - curr_append_size = (info->zone_num_pages - offset) * - info->zns_page_size; - if (curr_append_size > size) - curr_append_size = size; - int ret = append_to_data_zone(info, block->block_map->zone_saddr, - buffer, curr_append_size); - if (ret) { - pthread_mutex_unlock(&block->logical_block_lock); - return ret; - } - increase_zone_write_ptr(block->block_map, - curr_append_size / info->zns_page_size); - pthread_mutex_unlock(&block->logical_block_lock); - } else { - curr_append_size = size; - if (!block->old_page_maps && block->block_map) { - uint32_t diff_size = (block->block_map->write_ptr - offset) * - info->zns_page_size; - if (curr_append_size > diff_size) - curr_append_size = diff_size; - } - pthread_mutex_unlock(&block->logical_block_lock); - int ret = append_to_log_zone(info, address / info->zns_page_size, - buffer, curr_append_size); - if (ret) - return ret; - } - address += curr_append_size; - buffer = (char *)buffer + curr_append_size; - size -= curr_append_size; - } - return 0; -} - -int deinit_ss_zns_device(struct user_zns_device *my_dev) -{ - zns_info *info = (zns_info *)my_dev->_private; - // Kill gc - info->run_gc = false; - pthread_join(info->gc_thread, NULL); - logical_block *blocks = info->logical_blocks; - // free hashmap - for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - // Clear all log heads for a logical block - while (blocks[i].page_maps) { - page_map *tmp = blocks[i].page_maps; - blocks[i].page_maps = blocks[i].page_maps->next; - free(tmp); - } - if (blocks[i].block_map) { - pthread_mutex_destroy(&blocks[i].block_map->num_valid_pages_lock); - pthread_mutex_destroy(&blocks[i].block_map->write_ptr_lock); - free(blocks[i].block_map); - } - pthread_mutex_destroy(&blocks[i].logical_block_lock); - } - free(blocks); - while (info->used_log_zones_list) { - zone_info *tmp = info->used_log_zones_list; - info->used_log_zones_list = info->used_log_zones_list->next; - pthread_mutex_destroy(&tmp->num_valid_pages_lock); - pthread_mutex_destroy(&tmp->write_ptr_lock); - free(tmp); - } - while (info->free_zones_list) { - zone_info *tmp = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; - pthread_mutex_destroy(&tmp->num_valid_pages_lock); - pthread_mutex_destroy(&tmp->write_ptr_lock); - free(tmp); - } - pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); - pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); - free(info->curr_log_zone); - pthread_mutex_destroy(&info->zones_list_lock); - free(info); - free(my_dev); - return 0; -} - } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 69c9f10..a757cd2 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -24,7 +24,6 @@ SOFTWARE. #define STOSYS_PROJECT_ZNS_DEVICE_H #include -#include extern "C" { From 1454397a2e878bf3b797f09d149230729257003b Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 9 Oct 2022 11:23:27 +0000 Subject: [PATCH 042/101] fix bug --- src/m45-rocksdb/S2FileSystem.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index c1d5a81..358d961 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -56,7 +56,7 @@ namespace ROCKSDB_NAMESPACE { } - void Get_EntityName(std::string path, string::string &entityName) { + void Get_EntityName(std::string path, std::string &entityName) { } From c5fd256e58c4828b91c5150a38da4826c460b4ae Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 9 Oct 2022 13:05:56 +0000 Subject: [PATCH 043/101] Structure and function update --- src/m45-rocksdb/S2FileSystem.cc | 191 ++++++++++++++++++++++++++------ src/m45-rocksdb/S2FileSystem.h | 4 +- 2 files changed, 162 insertions(+), 33 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index c1d5a81..aa34c3f 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -29,39 +29,75 @@ SOFTWARE. #include namespace ROCKSDB_NAMESPACE { - - int Load_From_NVM(uint64_t addr, void *buffer, uint64_t size) { - return 0; - } - int LookupMap_HashFunction(void *data) { - return *((int*) data) / LOOKUP_MAP_SIZE; + return *((int*) data) / LOOKUP_MAP_SIZE; } int LookupMap_Lookup(MYFS *FSObj, std::string id, void *ptr) { - + } int LookupMap_Insert(MYFS *FSObj, std::string id, void *ptr) { - + } - int Load_Children(Inode *ptr, std::string entitiyName, std::vector *children, bool loadChildren) { - //Check no of children and load it - uint64_t childrens_count = ptr->FileSize; + int Load_From_NVM(uint64_t addr, void *buffer, uint64_t size) { + return 0; + } + int Store_To_NVM(uint64_t addr, void *buffer, uint64_t size) { + return 0; } - void Get_ParentPath(std::string path, std::string &parent) { + uint32_t get_FreeInode(MYFS *FSObj) { } - void Get_EntityName(std::string path, string::string &entityName) { + uint64_t get_FreeDataBlock(MYFS *FSObj) { } + //Trim till /../path in /../path/name + void Get_ParentPath(std::string path, std::string &parent) { + int index; + for(int i=path.size()-1; i>=0; i--) { + if (path[i]=='/') { + index = i; + break; + } + } + parent = path.substr(0,index); + } + + //Trim /../path/name to name + void Get_EntityName(std::string path, std::string &entityName) { + int index; + for(int i=path.size()-1; i>=0; i--) { + if (path[i]=='/') { + index = i; + break; + } + } + parent = path.substr(index+1,path.size()); + } + + //Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool + //return value will be 0 if asked child is not present + uint32_t Load_Children(Inode *ptr, std::string entitiyName, std::vector *children, bool loadChildren) { + //Check no of children and load it + uint64_t childrens_count = ptr->FileSize; + + } + + //A recursive call to load inode of the given path to lookupmap + //Stores the inode ptr as well, returns 0 in success int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr) { - //Check if path in lookupMap cache + if (path=="/tmp") { + ptr = FSObj->rootEntry; + return 0; + } + + //Check if path in lookupMap cache int isPresent = LookupMap_Lookup(FSObj, path, ptr); if(!isPresent) return 0; @@ -78,17 +114,78 @@ namespace ROCKSDB_NAMESPACE { if(parentInode->FileSize == 0) return -1; - //Get children + //Get Entity to search for std::string entityName; Get_EntityName(path, entityName); uint32_t index = Load_Children(parentInode, entityName, NULL, false); - //Load the inode; + if (index) + return -1; + + //Load the children index inode from disk and store in lookupMap; uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; ptr = (Inode *) calloc(1, sizeof(Inode)); isPresent = Load_From_NVM(address, ptr, (uint64_t) INODE_SIZE); + if (isPresent) + return -1; //Put it in lookup Map - isPresent = LookupMap_Insert(FSObj, path, ptr); + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + + int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode) { + Inode *ptr; + int isPresent = Get_Path_Inode(FSObj, Ppath, ptr); + ptr->FileSize += 1; + //FIXME : Get the dir update logic here + return 0; + } + + + int MYFS_CreateFile(MYFS *FSObj, std::string path) { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *) calloc(1, sizeof(Inode)); + //Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName,entityName.c_str()); + + //Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + //Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + int MYFS_CreateDir(MYFS *FSObj, std::string path) { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *) calloc(1, sizeof(Inode)); + + //Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName,entityName.c_str()); + ptr->IsDir = true; + + //Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + //Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; } @@ -123,22 +220,25 @@ namespace ROCKSDB_NAMESPACE { //Init Bitmaps from disk if (debug) std::cout<<"Init MYFS"<FileSystemObj; - this->FileSystemObj.FileSystemCapacity = this->_zns_dev->capacity_bytes; - this->FileSystemObj.LogicalBlockSize = this->_zns_dev->lba_size_bytes; + this->FileSystemObj = (MYFS *) calloc(1, sizeof(MYFS)); + this->FileSystemObj->FileSystemCapacity = this->_zns_dev->capacity_bytes; + this->FileSystemObj->LogicalBlockSize = this->_zns_dev->lba_size_bytes; //We reserve a single block as super block and MAX_INODE_COUNT as - this->FileSystemObj.DataBlockCount = (this->FileSystemObj.FileSystemCapacity / this->FileSystemObj.LogicalBlockSize + this->FileSystemObj->DataBlockCount = (this->FileSystemObj->FileSystemCapacity / this->FileSystemObj->LogicalBlockSize - (MAX_INODE_COUNT + 1)); if (debug) - std::cout<<"File System params : "<FileSystemObj.FileSystemCapacity<<" "<< - this->FileSystemObj.LogicalBlockSize<<" "<FileSystemObj.DataBlockCount<FileSystemObj->FileSystemCapacity<<" "<< + this->FileSystemObj->LogicalBlockSize<<" "<FileSystemObj->DataBlockCount<FileSystemObj.DataBitMap = (bool*) calloc(this->FileSystemObj.DataBlockCount, sizeof(bool)); + this->FileSystemObj->DataBitMap = (bool*) calloc(this->FileSystemObj->DataBlockCount, sizeof(bool)); //Init root inode //TODO: In case of persistency check if already present in disk - this->FileSystemObj.rootEntry = (Inode *) calloc(1,sizeof(Inode)); + //FIXME: Get root dir name dynamically + strcpy(this->FileSystemObj->rootEntry->EntityName,"tmp"); + this->FileSystemObj->rootEntry->IsDir = true; + this->FileSystemObj->rootEntry->FileSize = 0; } S2FileSystem::~S2FileSystem() { @@ -224,18 +324,37 @@ namespace ROCKSDB_NAMESPACE { // Create the specified directory. Returns error if directory exists. IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, dirname, ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); + else + return IOStatus::IOError(__FUNCTION__); + + return IOStatus::OK(); } // Creates directory if missing. Return Ok if it exists, or successful in // Creating. IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, dirname, ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + else + *file_size = ptr->FileSize; + return IOStatus::OK(); } IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { @@ -249,7 +368,8 @@ namespace ROCKSDB_NAMESPACE { IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + *output_path = db_path; + return IOStatus::OK(); } IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { @@ -322,7 +442,12 @@ namespace ROCKSDB_NAMESPACE { // IOError if an IO Error was encountered IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int err = Get_Path_Inode(this->FileSystemObj, dir, ptr); + if (err) + return IOStatus::IOError(__FUNCTION__); + Load_Children(ptr, "", result, true); + return IOStatus::OK(); } // Returns OK if the named file exists. @@ -331,7 +456,11 @@ namespace ROCKSDB_NAMESPACE { // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index 484e388..d68eac0 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -82,7 +82,7 @@ namespace ROCKSDB_NAMESPACE { int Read_User_Data(); void Get_ParentPath(std::string path, std::string &parent); void Get_EntityName(std::string path, std::string &entityName); - void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); + void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); int LookupMap_HashFunction(void *data); @@ -239,7 +239,7 @@ namespace ROCKSDB_NAMESPACE { struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; - struct MYFS FileSystemObj; + struct MYFS *FileSystemObj; }; } From 0e4527e8b0f47dc911088126db5ec541fdb5333f Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 9 Oct 2022 19:01:21 +0000 Subject: [PATCH 044/101] Updated struct and function def --- src/m45-rocksdb/DummyFSForward.cc | 65 ++++++++++++++++++++++++------- src/m45-rocksdb/DummyFSForward.h | 45 ++++++++++++++++++++- 2 files changed, 96 insertions(+), 14 deletions(-) diff --git a/src/m45-rocksdb/DummyFSForward.cc b/src/m45-rocksdb/DummyFSForward.cc index a858e3c..0346ca9 100644 --- a/src/m45-rocksdb/DummyFSForward.cc +++ b/src/m45-rocksdb/DummyFSForward.cc @@ -42,6 +42,7 @@ namespace ROCKSDB_NAMESPACE { this->_name = this->_name.append(this->_private_fs->Name()); this->_ss.str(""); this->_ss.clear(); + } const char *DummyFSForward::Name() const { @@ -52,10 +53,23 @@ namespace ROCKSDB_NAMESPACE { this->_ss.str(""); this->_ss.clear(); this->_ss << " call_seq: " << this->_seq_id++ << " tid: " << std::hash{}(std::this_thread::get_id()) << " "; - return this->_ss.str(); + return this->_ss.str(); } +/* + MYFS_SequentialFile::MYFS_SequentialFile(const std::string& fname, int fd){} + MYFS_SequentialFile::~MYFS_SequentialFile(){} + + IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg){std::cout<<"MYSEQ Read"< *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewSequentialFile(fname, file_opts, result, dbg); + result->reset(); + //int fid; //open(""); + //result->reset(new MYFS_SequentialFile(fname, 100)); + //IOStatus stat; + //return stat; + std::cout << "New seq file : "<_private_fs->NewSequentialFile(fname, file_opts, result, dbg); } // Create a brand new random access read-only file with the @@ -82,7 +102,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewRandomAccessFile(fname, file_opts, result, dbg); + std::cout << "Random access file : "<_private_fs->NewRandomAccessFile(fname, file_opts, result, dbg); } // Create an object that writes to a new file with the specified @@ -97,7 +118,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewWritableFile(fname, file_opts, result, dbg); + std::cout << "Writable file : "<_private_fs->NewWritableFile(fname, file_opts, result, dbg); } // Create an object that writes to a new file with the specified @@ -159,7 +181,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewDirectory(name, io_opts, result, dbg); + std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXX : "<_private_fs->NewDirectory(name, io_opts, result, dbg); } // Returns OK if the named file exists. @@ -171,7 +194,10 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->FileExists(fname, options, dbg); + std::cout << "Check if file exist : " << fname; + IOStatus stat = this->_private_fs->FileExists(fname, options, dbg); + std::cout << std::endl; + return stat; } // Store in *result the names of the children of the specified directory. @@ -234,7 +260,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->CreateDirIfMissing(dirname, options, dbg); + std::cout << "Create dir path : "<_private_fs->CreateDirIfMissing(dirname, options, dbg); } // Delete the specified directory. @@ -249,7 +276,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->GetFileSize(fname, options, file_size, dbg); + std::cout << "File size : "<< fname << std::endl; + return this->_private_fs->GetFileSize(fname, options, file_size, dbg); } // Store the last modification time of fname in *file_mtime. @@ -266,7 +294,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->RenameFile(src, target, options, dbg); + std::cout << "Rename file : "<_private_fs->RenameFile(src, target, options, dbg); } // Hard Link file src to target. @@ -310,7 +339,10 @@ namespace ROCKSDB_NAMESPACE { IOStatus DummyFSForward::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->LockFile(fname, options, lock, dbg); + std::cout << "Lock the file : "<_private_fs->LockFile(fname, options, lock, dbg); + IOStatus stat; + return stat; } // Release the lock acquired by a previous successful call to LockFile. @@ -319,7 +351,10 @@ namespace ROCKSDB_NAMESPACE { IOStatus DummyFSForward::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->UnlockFile(lock, options, dbg); + //std::cout << "unlock the file : "<_private_fs->UnlockFile(lock, options, dbg); + IOStatus stat; + return stat; } // *path is set to a temporary directory that can be used for testing. It may @@ -348,7 +383,11 @@ namespace ROCKSDB_NAMESPACE { std::string *output_path, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->GetAbsolutePath(db_path,options, output_path, dbg); + IOStatus stat; + //stat = this->_private_fs->GetAbsolutePath(db_path,options, output_path, dbg); + *output_path = db_path.substr(0,db_path.size()-1); + std::cout << "Abs Path : " << db_path <<" "<<*output_path << std::endl; + return stat; } // Get the amount of free disk space @@ -366,4 +405,4 @@ namespace ROCKSDB_NAMESPACE { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; return this->_private_fs->IsDirectory(path, options, is_dir, dgb); } -} \ No newline at end of file +} diff --git a/src/m45-rocksdb/DummyFSForward.h b/src/m45-rocksdb/DummyFSForward.h index 7f4282e..7f1b30e 100644 --- a/src/m45-rocksdb/DummyFSForward.h +++ b/src/m45-rocksdb/DummyFSForward.h @@ -27,8 +27,50 @@ SOFTWARE. #include "rocksdb/io_status.h" #include "rocksdb/file_system.h" #include "rocksdb/status.h" +#include +/* +class MYFS_File : class FSSequentialFile { + public: + MYFS_File(); + ~MYFS_File(); + IOStatus Read(); + IOStatus Write(); + IOStatus Close(); + private: + int fd; + int inode; + char *file; +}; +*/ namespace ROCKSDB_NAMESPACE { + /* + + class MYFS_SequentialFile : public FSSequentialFile{ + private: + std::string filename_; + FILE* file_; + int fd_; + bool use_direct_io_; + size_t logical_sector_size_; + + public: + MYFS_SequentialFile(const std::string& fname, int fd); + virtual ~MYFS_SequentialFile(); + + virtual IOStatus Read(size_t n, const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus PositionedRead(uint64_t offset, size_t n, + const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus Skip(uint64_t n) override; + //virtual IOStatus InvalidateCache(size_t offset, size_t length) override; + virtual bool use_direct_io() const override { return use_direct_io_; } + virtual size_t GetRequiredBufferAlignment() const override { + return logical_sector_size_; + } + }; +*/ class DummyFSForward : public FileSystem { public: // No copying allowed @@ -123,7 +165,8 @@ namespace ROCKSDB_NAMESPACE { IOStatus ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg); private: - std::string get_seq_id(); + struct user_zns_device *_zns_dev; + std::string get_seq_id(); std::shared_ptr _private_fs; std::atomic _seq_id{}; std::string _name; From dab15bb7a33421413791fcd5c5ae319942b90f95 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 10 Oct 2022 14:40:17 +0000 Subject: [PATCH 045/101] Bug fixes --- src/m45-rocksdb/S2FileSystem.cc | 648 ++++++++++++++++++++++---------- src/m45-rocksdb/S2FileSystem.h | 248 ++++++++---- 2 files changed, 621 insertions(+), 275 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index aa34c3f..e948579 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -28,168 +28,362 @@ SOFTWARE. #include #include -namespace ROCKSDB_NAMESPACE { - int LookupMap_HashFunction(void *data) { - return *((int*) data) / LOOKUP_MAP_SIZE; +namespace ROCKSDB_NAMESPACE +{ + int LookupMap_HashFunction(std::string id) + { + unsigned hashindex; + char *ptr = const_cast(id.c_str()); + for (hashindex = 0; *ptr != '\0'; ptr++) + hashindex = *ptr + STRINGENCODE * hashindex; + return hashindex % LOOKUP_MAP_SIZE; + } + + int LookupMap_Insert(MYFS *FSObj, std::string id, Inode *ptr) + { + int index = LookupMap_HashFunction(id); + + mapEntries *map = (mapEntries *)calloc(1, sizeof(mapEntries)); + map->id = id; + map->ptr = ptr; + map->chain = NULL; + + if (FSObj->LookupCache[index] == NULL) + FSObj->LookupCache[index] = map; + else + { + struct mapEntries *head; + head = FSObj->LookupCache[index]; + while (head->chain != NULL) + head = head->chain; + head->chain = map; + } + + return 0; } - int LookupMap_Lookup(MYFS *FSObj, std::string id, void *ptr) { + int LookupMap_Delete(MYFS *FSObj, std::string id) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head, *tmp; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (head->id == id) + { + if (tmp == NULL) + FSObj->LookupCache[index] = head->chain; + else + tmp->chain = head->chain; + free(head); + break; + } + head = head->chain; + } + return 0; } - int LookupMap_Insert(MYFS *FSObj, std::string id, void *ptr) { + int LookupMap_Lookup(MYFS *FSObj, std::string id, Inode **ptr) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (head->id == id) + break; + head = head->chain; + } + + if (head == NULL) + return -1; + *ptr = head->ptr; + return 0; } - int Load_From_NVM(uint64_t addr, void *buffer, uint64_t size) { - return 0; + int Load_From_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) + { + // Check the size if quantization of LBA + int err = zns_udevice_read(FSObj->zns, addr, buffer, size); + return err; } - int Store_To_NVM(uint64_t addr, void *buffer, uint64_t size) { - return 0; + int Store_To_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) + { + int err = zns_udevice_write(FSObj->zns, addr, buffer, size); + return err; } - uint32_t get_FreeInode(MYFS *FSObj) { - + uint32_t get_FreeInode(MYFS *FSObj) + { + uint32_t ptr = (FSObj->InodePtr + 1) % MAX_INODE_COUNT; + while (ptr != FSObj->InodePtr) + { + if (!FSObj->InodeBitMap[ptr]) + { + FSObj->InodePtr = ptr; + return ptr; + } + ptr = (ptr + 1) % MAX_INODE_COUNT; + } + return 0; } - uint64_t get_FreeDataBlock(MYFS *FSObj) { - + uint64_t get_FreeDataBlock(MYFS *FSObj) + { + uint64_t ptr = (FSObj->DataBlockPtr + 1) % FSObj->DataBlockCount; + while (ptr != FSObj->DataBlockPtr) + { + if (!FSObj->DataBitMap[ptr]) + { + FSObj->DataBlockPtr = ptr; + return (ptr + DATA_BLOCKS_OFFSET) * FSObj->LogicalBlockSize; + } + ptr = (ptr + 1) % FSObj->DataBlockCount; + } + return 0; } - //Trim till /../path in /../path/name - void Get_ParentPath(std::string path, std::string &parent) { - int index; - for(int i=path.size()-1; i>=0; i--) { - if (path[i]=='/') { - index = i; - break; - } - } - parent = path.substr(0,index); + void free_DataBlock(MYFS *FSObj, uint64_t addr) + { + int index = (addr / FSObj->LogicalBlockSize) - DATA_BLOCKS_OFFSET; + FSObj->DataBitMap[index] = false; } - //Trim /../path/name to name - void Get_EntityName(std::string path, std::string &entityName) { - int index; - for(int i=path.size()-1; i>=0; i--) { - if (path[i]=='/') { + // Trim till /../path in /../path/name + void Get_ParentPath(std::string path, std::string &parent) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { index = i; break; } } - parent = path.substr(index+1,path.size()); + // Trim if additional slash is present + if (path[index - 1] == '/') + index--; + + parent = path.substr(0, index); } - //Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool - //return value will be 0 if asked child is not present - uint32_t Load_Children(Inode *ptr, std::string entitiyName, std::vector *children, bool loadChildren) { - //Check no of children and load it - uint64_t childrens_count = ptr->FileSize; + // Trim /../path/name to name + void Get_EntityName(std::string path, std::string &entityName) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { + index = i; + break; + } + } + entityName = path.substr(index + 1, path.size()); + } + + // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool + // return value will be 0 if asked child is not present + uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren) + { + // Check no of children and load it + // FIXME: Logic for rename + uint64_t children_count = ptr->FileSize; + + MYFS_Dir *dir_ptr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); + for (int i = 0; i < children_count / 16; i++) + { + Load_From_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + for (int j = 0; j < 16; j++) + { + if (loadChildren) + children->push_back(dir_ptr->Entities[j].EntityName); + else + { + if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) + { + free(dir_ptr); + return dir_ptr->Entities[j].InodeNum; + } + } + } + } + + Load_From_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + for (int i = 0; i < children_count % 16; i++) + { + if (loadChildren) + children->push_back(dir_ptr->Entities[i].EntityName); + else + { + if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) + { + free(dir_ptr); + return dir_ptr->Entities[i].InodeNum; + } + } + } + free(dir_ptr); + return 0; } - //A recursive call to load inode of the given path to lookupmap - //Stores the inode ptr as well, returns 0 in success - int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr) { - if (path=="/tmp") { - ptr = FSObj->rootEntry; - return 0; - } - - //Check if path in lookupMap cache - int isPresent = LookupMap_Lookup(FSObj, path, ptr); - if(!isPresent) - return 0; + // A recursive call to load inode of the given path to lookupmap + // Stores the inode ptr as well, returns 0 in success + int Get_Path_Inode(MYFS *FSObj, std::string path, Inode **ptr) + { + if (path == "/tmp") + { + *ptr = FSObj->rootEntry; + std::cout << (*ptr)->EntityName << std::endl; + return 0; + } - //if not : Get_Path_Inode for parent dir - std::string parent; - Inode *parentInode; + std::cout << "Path to look for : " << path << std::endl; + // Check if path in lookupMap cache + int isPresent = LookupMap_Lookup(FSObj, path, ptr); + if (!isPresent) + return 0; - Get_ParentPath(path, parent); - isPresent = Get_Path_Inode(FSObj, parent, parentInode); - if(isPresent) - return -1; - //Read parent dir and get asked inode number - if(parentInode->FileSize == 0) - return -1; + // if not : Get_Path_Inode for parent dir + std::string parent; + Inode *parentInode; + Get_ParentPath(path, parent); + std::cout << "Parent path : " << parent << std::endl; + isPresent = Get_Path_Inode(FSObj, parent, &parentInode); + if (isPresent) + return -1; + // Read parent dir and get asked inode number + if (parentInode->FileSize == 0) + return -1; - //Get Entity to search for - std::string entityName; - Get_EntityName(path, entityName); - uint32_t index = Load_Children(parentInode, entityName, NULL, false); - if (index) - return -1; + // Get Entity to search for + std::string entityName; + Get_EntityName(path, entityName); + uint32_t index = Load_Children(FSObj, parentInode, entityName, NULL, false); + if (index) + return -1; - //Load the children index inode from disk and store in lookupMap; - uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; - ptr = (Inode *) calloc(1, sizeof(Inode)); - isPresent = Load_From_NVM(address, ptr, (uint64_t) INODE_SIZE); - if (isPresent) - return -1; + // Load the children index inode from disk and store in lookupMap; + uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; + ptr = (Inode **)calloc(1, sizeof(Inode)); + isPresent = Load_From_NVM(FSObj, address, ptr, (uint64_t)INODE_SIZE); + if (isPresent) + return -1; - //Put it in lookup Map - LookupMap_Insert(FSObj, path, ptr); + // Put it in lookup Map + LookupMap_Insert(FSObj, path, *ptr); - return 0; + return 0; } + int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) + { + // FIXME: Logic for deletion and rename - int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode) { Inode *ptr; - int isPresent = Get_Path_Inode(FSObj, Ppath, ptr); - ptr->FileSize += 1; - //FIXME : Get the dir update logic here - return 0; + int isPresent = Get_Path_Inode(FSObj, Ppath, &ptr); + if (isPresent) + return -1; + + MYFS_DirData dirDataptr; + strcpy(dirDataptr.EntityName, childName.c_str()); + dirDataptr.InodeNum = childInode; + + MYFS_Dir *dirPtr; + dirPtr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); + int index = (++ptr->FileSize) / 16; + uint64_t addr = ptr->Direct_data_lbas[index]; + + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + ptr->Direct_data_lbas[index] = addr; + } + else + { + index = Load_From_NVM(FSObj, addr, dirPtr, 4096); + if (index) + return -1; + } + + index = ptr->FileSize % 16; + dirPtr->Entities[index - 1] = dirDataptr; + Store_To_NVM(FSObj, addr, dirPtr, 4096); + free(dirPtr); + + return 0; } + void MYFS_DeletePath(MYFS *FSObj, std::string path) + { + Inode *ptr; + int isPresent = Get_Path_Inode(FSObj, path, &ptr); + if (isPresent) + return; + // TODO: Handle logic if dir + // Free data block of inode as well! + + // Update Parent + std::string ppath; + Get_ParentPath(path, ppath); + // Delete from lookup map + } - int MYFS_CreateFile(MYFS *FSObj, std::string path) { + int MYFS_CreateFile(MYFS *FSObj, std::string path) + { uint32_t inode_no = get_FreeInode(FSObj); - Inode *ptr = (Inode *) calloc(1, sizeof(Inode)); - //Fill the ptr + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); + // Fill the ptr std::string entityName; Get_EntityName(path, entityName); - strcpy(ptr->EntityName,entityName.c_str()); + strcpy(ptr->EntityName, entityName.c_str()); - //Update parent - std::string parent; - Get_ParentPath(path, parent); - int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + // Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); if (parentUpdated) - return -1; + return -1; - //Load to lookupmap + // Load to lookupmap LookupMap_Insert(FSObj, path, ptr); - - return 0; + + return 0; } - int MYFS_CreateDir(MYFS *FSObj, std::string path) { + int MYFS_CreateDir(MYFS *FSObj, std::string path) + { uint32_t inode_no = get_FreeInode(FSObj); - Inode *ptr = (Inode *) calloc(1, sizeof(Inode)); + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); - //Fill the ptr + // Fill the ptr std::string entityName; Get_EntityName(path, entityName); - strcpy(ptr->EntityName,entityName.c_str()); + strcpy(ptr->EntityName, entityName.c_str()); ptr->IsDir = true; - //Update parent + // Update parent std::string parent; Get_ParentPath(path, parent); int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); if (parentUpdated) return -1; - //Load to lookupmap + // Load to lookupmap LookupMap_Insert(FSObj, path, ptr); - - return 0; - } + return 0; + } - S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) { + S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) + { FileSystem::Default(); std::string sdelimiter = ":"; std::string edelimiter = "://"; @@ -197,51 +391,57 @@ namespace ROCKSDB_NAMESPACE { struct zdev_init_params params; std::string device = uri_db_path.substr(uri_db_path.find(sdelimiter) + sdelimiter.size(), uri_db_path.find(edelimiter) - - (uri_db_path.find(sdelimiter) + sdelimiter.size())); - //make sure to setup these parameters properly and check the forced reset flag for M5 + (uri_db_path.find(sdelimiter) + sdelimiter.size())); + // make sure to setup these parameters properly and check the forced reset flag for M5 params.name = strdup(device.c_str()); params.log_zones = 3; params.gc_wmark = 1; params.force_reset = true; int ret = init_ss_zns_device(¶ms, &this->_zns_dev); - if(ret != 0){ + if (ret != 0) + { std::cout << "Error: " << uri_db_path << " failed to open the device " << device.c_str() << "\n"; std::cout << "Error: ret " << ret << "\n"; } - assert (ret == 0); + assert(ret == 0); assert(this->_zns_dev->lba_size_bytes != 0); assert(this->_zns_dev->capacity_bytes != 0); ss_dprintf(DBG_FS_1, "device %s is opened and initialized, reported LBA size is %u and capacity %lu \n", device.c_str(), this->_zns_dev->lba_size_bytes, this->_zns_dev->capacity_bytes); - - - //INIT File System - //TODO: In case of persistency; Read following data from Super block - //Init Bitmaps from disk - if (debug) - std::cout<<"Init MYFS"<FileSystemObj = (MYFS *) calloc(1, sizeof(MYFS)); - this->FileSystemObj->FileSystemCapacity = this->_zns_dev->capacity_bytes; - this->FileSystemObj->LogicalBlockSize = this->_zns_dev->lba_size_bytes; - //We reserve a single block as super block and MAX_INODE_COUNT as - this->FileSystemObj->DataBlockCount = (this->FileSystemObj->FileSystemCapacity / this->FileSystemObj->LogicalBlockSize - - (MAX_INODE_COUNT + 1)); - if (debug) - std::cout<<"File System params : "<FileSystemObj->FileSystemCapacity<<" "<< - this->FileSystemObj->LogicalBlockSize<<" "<FileSystemObj->DataBlockCount<FileSystemObj->DataBitMap = (bool*) calloc(this->FileSystemObj->DataBlockCount, sizeof(bool)); - - //Init root inode - //TODO: In case of persistency check if already present in disk - //FIXME: Get root dir name dynamically - strcpy(this->FileSystemObj->rootEntry->EntityName,"tmp"); - this->FileSystemObj->rootEntry->IsDir = true; - this->FileSystemObj->rootEntry->FileSize = 0; - } - - S2FileSystem::~S2FileSystem() { + + // INIT File System + // TODO: In case of persistency; Read following data from Super block + // Init Bitmaps from disk + if (debug) + std::cout << "Init MYFS" << std::endl; + this->FileSystemObj = (MYFS *)calloc(1, sizeof(MYFS)); + this->FileSystemObj->zns = this->_zns_dev; + this->FileSystemObj->FileSystemCapacity = this->_zns_dev->capacity_bytes; + this->FileSystemObj->LogicalBlockSize = this->_zns_dev->lba_size_bytes; + // We reserve a single block as super block and MAX_INODE_COUNT as + this->FileSystemObj->DataBlockCount = (this->FileSystemObj->FileSystemCapacity / this->FileSystemObj->LogicalBlockSize - (MAX_INODE_COUNT + 1)); + if (debug) + std::cout << "File System params : " << this->FileSystemObj->FileSystemCapacity << " " << this->FileSystemObj->LogicalBlockSize << " " << this->FileSystemObj->DataBlockCount << std::endl; + + // Init Data blocks bitmap + // this->FileSystemObj->LookupCache = (mapEntries *) calloc(LOOKUP_MAP_SIZE, sizeof(mapEntries)); + this->FileSystemObj->DataBitMap = (bool *)calloc(this->FileSystemObj->DataBlockCount, sizeof(bool)); + + // Init root inode + // TODO: In case of persistency check if already present in disk + // FIXME: Get root dir name dynamically + this->FileSystemObj->DataBlockPtr = 0; // Reserved for Root Node + this->FileSystemObj->InodePtr = 0; + this->FileSystemObj->InodeBitMap[0] = true; + this->FileSystemObj->rootEntry = (Inode *)calloc(1, sizeof(Inode)); + strcpy(this->FileSystemObj->rootEntry->EntityName, "tmp"); + this->FileSystemObj->rootEntry->IsDir = true; + this->FileSystemObj->rootEntry->FileSize = 0; + this->FileSystemObj->rootEntry->Direct_data_lbas[0] = DATA_BLOCKS_OFFSET * this->FileSystemObj->LogicalBlockSize; + } + + S2FileSystem::~S2FileSystem() + { } // Create a brand new sequentially-readable file with the specified name. @@ -251,11 +451,13 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) { + IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } @@ -267,11 +469,13 @@ namespace ROCKSDB_NAMESPACE { // // The returned file may be concurrently accessed by multiple threads. IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } - const char *S2FileSystem::Name() const { + const char *S2FileSystem::Name() const + { return "S2FileSytem"; } @@ -283,21 +487,25 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { + IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::NewRandomRWFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { + IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) { + IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) + { return IOStatus::IOError(__FUNCTION__); } @@ -310,86 +518,105 @@ namespace ROCKSDB_NAMESPACE { // returns non-OK. IOStatus S2FileSystem::NewDirectory(const std::string &name, const IOOptions &io_opts, std::unique_ptr *result, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + std::cout<<"New Directory "<FileSystemObj, dirname, ptr); - if (isPresent) - isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); - else - return IOStatus::IOError(__FUNCTION__); + int isPresent = Get_Path_Inode(this->FileSystemObj, dirname, &ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); + else + return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); + return IOStatus::OK(); } // Creates directory if missing. Return Ok if it exists, or successful in // Creating. - IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, dirname, ptr); - if (isPresent) - isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); + IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { + Inode *ptr; + std::cout << "If dir missing : " << dirname << std::endl; + std::string dir = dirname.substr(0, dirname.size() - 1); + int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); + std::cout << "After check : " << std::endl; + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dir); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus - S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { + S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) + { Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, ptr); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - else - *file_size = ptr->FileSize; - return IOStatus::OK(); + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + else + *file_size = ptr->FileSize; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetFileModificationTime(const std::string &fname, const IOOptions &options, uint64_t *file_mtime, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, - IODebugContext *dbg) { + IODebugContext *dbg) + { *output_path = db_path; - return IOStatus::OK(); + std::cout << "Get Abs path" << std::endl; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { + MYFS_DeletePath(this->FileSystemObj, fname); + return IOStatus::OK(); } IOStatus S2FileSystem::NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) { + IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. - IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) + { + return IOStatus::OK(); } // Lock the specified file. Used to prevent concurrent access to @@ -406,30 +633,40 @@ namespace ROCKSDB_NAMESPACE { // to go away. // // May create the named file if it does not already exist. - IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) + { + return IOStatus::OK(); } IOStatus - S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) { + S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { + IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) { + IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::RenameFile(const std::string &src, const std::string &target, const IOOptions &options, - IODebugContext *dbg) { + IODebugContext *dbg) + { + // MYFS_DeletePath(this->FileSystemObj, target); + // FIXME: Logic for rename + // Change name in Inode + // Change in parent return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetChildrenFileAttributes(const std::string &dir, const IOOptions &options, - std::vector *result, IODebugContext *dbg) { + std::vector *result, IODebugContext *dbg) + { return FileSystem::GetChildrenFileAttributes(dir, options, result, dbg); } @@ -441,12 +678,15 @@ namespace ROCKSDB_NAMESPACE { // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, - IODebugContext *dbg) { + IODebugContext *dbg) + { Inode *ptr; - int err = Get_Path_Inode(this->FileSystemObj, dir, ptr); - if (err) - return IOStatus::IOError(__FUNCTION__); - Load_Children(ptr, "", result, true); + int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + uint32_t err = Load_Children(this->FileSystemObj, ptr, "", result, true); + if (!err) + return IOStatus::IOError(__FUNCTION__); return IOStatus::OK(); } @@ -455,17 +695,41 @@ namespace ROCKSDB_NAMESPACE { // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered - IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { + IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, ptr); + std::cout << "File Exists : " << fname << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); if (isPresent) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus S2FileSystem::ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } + + // MYFS File + // class MYFS_File + // { + // private: + // struct Inode *ptr; + // uint64_t curr_offset; + // MYFS *FSObj; + + // public: + // MYFS_File(std::string filePath) + // { + // } + // ~MYFS_File(); + // int Read(uint64_t size, char *data); + // int PRead(uint64_t offset, uint64_t size, char *data); + // int Seek(uint64_t offset); + // int Truncate(uint64_t size); + // int Append(uint64_t size, char *data); + // int PAppend(uint64_t offset, uint64_t size, char *data); + // }; } diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index d68eac0..50e5b93 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -31,121 +31,203 @@ SOFTWARE. #include #include - #define LOOKUP_MAP_SIZE 1000 #define MAX_INODE_COUNT 255 #define INODE_SIZE 4096 #define SUPER_BLOCK_SIZE 4096 -namespace ROCKSDB_NAMESPACE { - - struct mapEntries { - char *id; - void *ptr; - mapEntries *chain; +#define STRINGENCODE 31 +#define DATA_BLOCKS_OFFSET 256 +namespace ROCKSDB_NAMESPACE +{ + + struct Inode + { + char EntityName[239]; + bool IsDir; + uint64_t FileSize; + uint64_t Indirect_ptr_lbas; + uint64_t Direct_data_lbas[320]; }; - struct Inode { - char EntityName[239]; - bool IsDir; - uint64_t FileSize; - uint64_t Indirect_ptr_lbas; - uint64_t Direct_data_lbas[320]; + struct mapEntries + { + std::string id; + Inode *ptr; + mapEntries *chain; }; - struct Indirect_ptr { - uint64_t Direct_data_lbas[511]; - uint64_t Indirect_ptr_lbas; + struct Indirect_ptr + { + uint64_t Direct_data_lbas[511]; + uint64_t Indirect_ptr_lbas; }; - struct dir_data { + struct MYFS_DirData + { char EntityName[252]; uint32_t InodeNum; }; - struct Dir { - dir_data Entities[16]; - }; - - struct MYFS { - mapEntries *LookupCache[LOOKUP_MAP_SIZE]; //Map type to void ptrs; - bool InodeBitMap[MAX_INODE_COUNT]; - bool *DataBitMap; - uint64_t DataBlockCount; - uint64_t FileSystemCapacity; - uint32_t LogicalBlockSize; - Inode *rootEntry; + struct MYFS_Dir + { + MYFS_DirData Entities[16]; }; + struct MYFS + { + mapEntries *LookupCache[LOOKUP_MAP_SIZE]; // Map type to void ptrs; + bool InodeBitMap[MAX_INODE_COUNT]; + bool *DataBitMap; + uint32_t InodePtr; + + uint64_t DataBlockPtr; + uint64_t DataBlockMax; + + uint64_t DataBlockCount; + uint64_t FileSystemCapacity; + uint32_t LogicalBlockSize; + Inode *rootEntry; + user_zns_device *zns; + }; - int Load_From_NVM(uint64_t address, void *ptr, uint64_t size); - int Store_To_NVM(); - int Read_User_Data(); + int Load_From_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); + int Store_To_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); void Get_ParentPath(std::string path, std::string &parent); void Get_EntityName(std::string path, std::string &entityName); void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); - int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); + // int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); int LookupMap_HashFunction(void *data); - class MYFS_File { - private: - char *fileName; - bool created; - struct Inode *ptr; - void *curr_data_ptr; - MYFS *FSObj; - public: - MYFS_File(); - ~MYFS_File(); - int Read(); - int Write(); - int Close(); + class MYFS_File + { + private: + struct Inode *ptr; + uint64_t curr_offset; + MYFS *FSObj; + + public: + MYFS_File(std::string filePath); + ~MYFS_File(); + int Read(uint64_t size, char *data); + int PRead(uint64_t offset, uint64_t size, char *data); + int Seek(uint64_t offset); + int Truncate(uint64_t size); + int Append(uint64_t size, char *data); + int PAppend(uint64_t offset, uint64_t size, char *data); }; - /* *Creates read only MYFS_File object */ - class MYFS_SequentialFile : public FSSequentialFile { - private: - MYFS_File fp; - uint64_t buffer_alignment_size; - public: - MYFS_SequentialFile(const std::string& fname, MYFS *FSObj); - virtual ~MYFS_SequentialFile(); - virtual IOStatus Read(size_t n,const IOOptions& opts, Slice* result, - char* scratch, IODebugContext* dbg) override; - virtual IOStatus PositionedRead(uint64_t offset, size_t n, - const IOOptions& opts, Slice* result, - char* scratch, IODebugContext* dbg) override; - virtual IOStatus Skip(uint64_t n) override; - virtual IOStatus InvalidateCache(size_t offset, size_t length) override { - return IOStatus::OK(); - }; - virtual bool use_direct_io() const override { return false; } - virtual size_t GetRequiredBufferAlignment() const override { - return buffer_alignment_size; - } - }; - - class MYFS_RandomAccessFile : public FSRandomAccessFile { - - }; + class MYFS_SequentialFile : public FSSequentialFile + { + private: + MYFS_File fp; - class MYFS_WritableFile : public FSWritableFile { - + public: + MYFS_SequentialFile(const std::string &fname, MYFS *FSObj); + virtual ~MYFS_SequentialFile(); + virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, + char *scratch, IODebugContext *dbg) override{}; + virtual IOStatus PositionedRead(uint64_t offset, size_t n, + const IOOptions &opts, Slice *result, + char *scratch, IODebugContext *dbg) override; + virtual IOStatus Skip(uint64_t n) override; + virtual IOStatus InvalidateCache(size_t offset, size_t length) override + { + return IOStatus::OK(); + }; + virtual bool use_direct_io() const override { return false; } + virtual size_t GetRequiredBufferAlignment() const override { return 4096; } }; - class MYFS_Directory : public FSDirectory { - + class MYFS_RandomAccessFile : public FSRandomAccessFile + { + private: + MYFS_File fp; + + public: + MYFS_RandomAccessFile(const std::string &fnmae, MYFS *FSObj); + virtual ~MYFS_RandomAccessFile(); + virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, + Slice *result, char *scratch, + IODebugContext *dbg) const override; + + virtual IOStatus MultiRead(FSReadRequest *reqs, size_t num_reqs, + const IOOptions &options, + IODebugContext *dbg) override; + + virtual IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions &opts, + IODebugContext *dbg) override; + + virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); }; + virtual bool use_direct_io() const override { return false; } + virtual size_t GetRequiredBufferAlignment() const override { return 4096; } }; + class MYFS_WritableFile : public FSWritableFile + { + private: + MYFS_File fp; + public: + virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) override; + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + const DataVerificationInfo & /* verification_info */, + IODebugContext *dbg) override + { + return Append(data, opts, dbg); + } + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, const DataVerificationInfo & /* verification_info */, + IODebugContext *dbg) override + { + return PositionedAppend(data, offset, opts, dbg); + } + virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Fsync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual bool IsSyncThreadSafe() const { return false; } + virtual bool use_direct_io() const override { return false; } + virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override; + virtual uint64_t GetFileSize(const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); } + virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + }; + + class MYFS_Directory : public FSDirectory + { + /* + public: + virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { + return IOStatus::OK(); + } + + virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override { + return IOStatus::OK(); + } + + virtual IOStatus FsyncWithDirOptions(const IOOptions&, IODebugContext*, + const DirFsyncOptions& dir_fsync_options) override { + return IOStatus::OK(); + } + */ + }; - class S2FileSystem : public FileSystem { + class S2FileSystem : public FileSystem + { public: // No copying allowed S2FileSystem(std::string uri, bool debug); - S2FileSystem(const S2FileSystem&) = delete; + S2FileSystem(const S2FileSystem &) = delete; virtual ~S2FileSystem(); IOStatus IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) override; @@ -199,9 +281,9 @@ namespace ROCKSDB_NAMESPACE { GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg); - IOStatus DeleteFile(const std::string& fname, - const IOOptions& options, - IODebugContext* dbg); + IOStatus DeleteFile(const std::string &fname, + const IOOptions &options, + IODebugContext *dbg); IOStatus NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, @@ -239,8 +321,8 @@ namespace ROCKSDB_NAMESPACE { struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; - struct MYFS *FileSystemObj; + struct MYFS *FileSystemObj; }; } -#endif //STOSYS_PROJECT_S2FILESYSTEM_H +#endif // STOSYS_PROJECT_S2FILESYSTEM_H From f668211cfd6f0bc681091e3b3aed3ebfbe85ab75 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Tue, 11 Oct 2022 11:39:23 +0000 Subject: [PATCH 046/101] Updates to FS --- src/m45-rocksdb/S2FileSystem.cc | 139 +++++++++++++++++++++++++------- src/m45-rocksdb/S2FileSystem.h | 18 +++-- 2 files changed, 121 insertions(+), 36 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index e948579..0265a0b 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -241,11 +241,9 @@ namespace ROCKSDB_NAMESPACE if (path == "/tmp") { *ptr = FSObj->rootEntry; - std::cout << (*ptr)->EntityName << std::endl; return 0; } - std::cout << "Path to look for : " << path << std::endl; // Check if path in lookupMap cache int isPresent = LookupMap_Lookup(FSObj, path, ptr); if (!isPresent) @@ -255,7 +253,6 @@ namespace ROCKSDB_NAMESPACE std::string parent; Inode *parentInode; Get_ParentPath(path, parent); - std::cout << "Parent path : " << parent << std::endl; isPresent = Get_Path_Inode(FSObj, parent, &parentInode); if (isPresent) return -1; @@ -320,7 +317,7 @@ namespace ROCKSDB_NAMESPACE return 0; } - + /* void MYFS_DeletePath(MYFS *FSObj, std::string path) { Inode *ptr; @@ -335,7 +332,7 @@ namespace ROCKSDB_NAMESPACE Get_ParentPath(path, ppath); // Delete from lookup map } - + */ int MYFS_CreateFile(MYFS *FSObj, std::string path) { uint32_t inode_no = get_FreeInode(FSObj); @@ -489,18 +486,21 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { + std::cout<<"Writable file"< *, IODebugContext *) { + std::cout<<"Writable file"< *, IODebugContext *) { + std::cout<<"RWWritable file"< *result, IODebugContext *dbg) { - std::cout<<"New Directory "<FileSystemObj, dir, &ptr); - std::cout << "After check : " << std::endl; + std::cout << std::endl << std::endl; if (isPresent) isPresent = MYFS_CreateDir(this->FileSystemObj, dir); if (isPresent) @@ -596,14 +596,15 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - MYFS_DeletePath(this->FileSystemObj, fname); + //MYFS_DeletePath(this->FileSystemObj, fname); return IOStatus::OK(); } IOStatus S2FileSystem::NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::cout<<"Logger \n"< *addressess, bool forWrite) { + + } + + + //MYFS_File definition + MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) + { + this->FSObj = FSObj; + Get_Path_Inode(FSObj, filePath, &(this->ptr)); + this->curr_read_offset = 0; + } + + int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) + { + if(ptr->FileSize < offset+size) + return -1; + + std::vector *addresses_to_read; + uint64_t addr = get_blocks_addr(this->FSObj, this->ptr, offset, size, addresses_to_read, false); + if(!addr) + return -1; + char *readD = (char *) calloc(addresses_to_read->size(), 4096); + + for(int i=0;isize();i++) + Load_From_NVM(this->FSObj, addresses_to_read->at(i),readD+(i*4096), 4096); + + int smargin = offset % 4096; + memcpy(data, readD+smargin, size); + free(readD); + return 0; + } + + + int MYFS_File::Read(uint64_t size, char *data) + { + //Check with file size + int err = this->PRead(this->curr_read_offset, size, data); + if (err) + return err; + this->curr_read_offset += size; + return 0; + } + + + int MYFS_File::Seek(uint64_t offset) + { + if(ptr->FileSize < offset) + return -1; + this->curr_read_offset = offset; + return 0; + } + + int MYFS_File::Truncate(uint64_t size) + { + //TODO: Free Data Block + this->ptr->FileSize = size; + return 0; + } + + int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) { + std::vector *addresses_to_read; + uint64_t addr = get_blocks_addr(this->FSObj, this->ptr, offset, size, addresses_to_read, false); + if(!addr) + return -1; + + //Do read-modify-update cycle if smargin is present on 1st address. + int smargin = offset % 4096; + char *buffer = (char *) calloc(addresses_to_read->size(), 4096); + if (smargin) + Load_From_NVM(this->FSObj, addresses_to_read->at(0),buffer, 4096); + + memcpy(buffer+smargin, data, size); + for(int i=0; isize(); i++) + Store_To_NVM(this->FSObj, addresses_to_read->at(i), data+(i*4096), 4096); + + //Update file size + this->ptr->FileSize = offset + size; + free(buffer); + } + + int MYFS_File::Append(uint64_t size, char *data) { + return this->PAppend(ptr->FileSize, size, data); + } + + int MYFS_File::Close() { + //Flush Inode changes to Disk + } + + + /* + //Def of MYFS_SequentialFile + MYFS_SequentialFile::MYFS_SequentialFile(std::string fpath, MYFS *FSObj) { + //this->fp = MYFS_File(fpath, FSObj); + } + */ + } diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index 50e5b93..b5a4d5b 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -90,23 +90,26 @@ namespace ROCKSDB_NAMESPACE user_zns_device *zns; }; + /* int Load_From_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); int Store_To_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); void Get_ParentPath(std::string path, std::string &parent); void Get_EntityName(std::string path, std::string &entityName); - void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); + //void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); // int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); int LookupMap_HashFunction(void *data); - + */ + + class MYFS_File { private: struct Inode *ptr; - uint64_t curr_offset; MYFS *FSObj; + uint64_t curr_read_offset; public: - MYFS_File(std::string filePath); + MYFS_File(std::string filePath, MYFS *FSObj); ~MYFS_File(); int Read(uint64_t size, char *data); int PRead(uint64_t offset, uint64_t size, char *data); @@ -114,6 +117,7 @@ namespace ROCKSDB_NAMESPACE int Truncate(uint64_t size); int Append(uint64_t size, char *data); int PAppend(uint64_t offset, uint64_t size, char *data); + int Close(); }; /* @@ -122,10 +126,10 @@ namespace ROCKSDB_NAMESPACE class MYFS_SequentialFile : public FSSequentialFile { private: - MYFS_File fp; + MYFS_File *fp; public: - MYFS_SequentialFile(const std::string &fname, MYFS *FSObj); + MYFS_SequentialFile(std::string filePath, MYFS *FSObj); virtual ~MYFS_SequentialFile(); virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) override{}; @@ -147,7 +151,7 @@ namespace ROCKSDB_NAMESPACE MYFS_File fp; public: - MYFS_RandomAccessFile(const std::string &fnmae, MYFS *FSObj); + MYFS_RandomAccessFile(const std::string &fname, MYFS *FSObj); virtual ~MYFS_RandomAccessFile(); virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, From b1315108a2bffd468d0e30187429e1c199d59a0d Mon Sep 17 00:00:00 2001 From: yssamtu Date: Wed, 12 Oct 2022 08:20:45 +0000 Subject: [PATCH 047/101] w/r multiple lba while gc merging --- src/m23-ftl/zns_device.cpp | 709 ++++++++++++++++--------------------- 1 file changed, 304 insertions(+), 405 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 90f8087..5399cf1 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -35,30 +35,31 @@ extern "C" { // zone in zns struct zone_info { - unsigned long long zone_saddr; + unsigned long long saddr; uint32_t num_valid_pages; uint32_t write_ptr; pthread_mutex_t num_valid_pages_lock; pthread_mutex_t write_ptr_lock; - zone_info *next; // linked in free_zones and used_log_zones_list + zone_info *next; // linked in free_zones and used_log_zones }; // page map for log zones struct page_map { - uint64_t logical_page_addr; + unsigned long long page_addr; unsigned long long physical_addr; - zone_info *page_zone_info; + zone_info *zone; page_map *next; // page map for each logical block }; // logical block contains data in log zone (page map) and data in data zone (block map) struct logical_block { - uint64_t logical_block_saddr; + unsigned long long s_page_addr; page_map *page_maps; // page mapping for this logical block (log zone) page_map *old_page_maps; - zone_info *block_map; // block mapping for this logical block (data zone) + page_map *page_maps_tail; + zone_info *data_zone; // block mapping for this logical block (data zone) //TODO: LOCK the access - pthread_mutex_t logical_block_lock; + pthread_mutex_t lock; }; struct zns_info { @@ -70,56 +71,45 @@ struct zns_info { // Query the nsid for following info int fd; unsigned nsid; - uint32_t zns_page_size; - uint32_t zns_num_zones; - uint32_t zone_num_pages; + uint32_t page_size; + uint32_t num_zones; uint32_t num_data_zones; + uint32_t zone_num_pages; uint32_t mdts; // max data transfer size (read limit) uint32_t zasl; // zone append size limit (append limit) // Log zones zone_info *curr_log_zone; int num_used_log_zones; - zone_info *used_log_zones_list; - zone_info *used_log_zones_list_tail; + zone_info *used_log_zones; + zone_info *used_log_zones_tail; // Free zones uint32_t num_free_zones; - zone_info *free_zones_list; - zone_info *free_zones_list_tail; - pthread_mutex_t zones_list_lock; + zone_info *free_zones; + zone_info *free_zones_tail; + pthread_mutex_t zones_lock; // logical block corresponding to each data zone logical_block *logical_blocks; }; -static inline void increase_zone_num_valid_page(zone_info *zone, - uint32_t num_pages); -static inline void decrease_zone_num_valid_page(zone_info *zone, - uint32_t num_pages); -static inline void increase_zone_write_ptr(zone_info *zone, - uint32_t num_pages); -static inline void decrease_zone_write_ptr(zone_info *zone, - uint32_t num_pages); -static inline uint32_t get_block_index(uint32_t key, uint32_t base); -static inline uint32_t get_data_offset(uint32_t key, uint32_t base); +static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); +static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages); +static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages); +static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages); +static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages); +static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); static void change_log_zone(zns_info *info); -static bool look_up_map(page_map *maps, uint64_t logical_page_addr, - unsigned long long *physical_addr); -static void update_map(zns_info *info, uint64_t logical_page_addr, - unsigned long long physical_addr); -static int read_from_zns(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size); -static int append_to_data_zone(zns_info *info, unsigned long long saddr, - void *buffer, uint32_t size); -static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, - void *buffer, uint32_t size); -static void merge(zns_info *info, logical_block *block, zone_info *new_zone); +static void update_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr); +static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size); +static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, uint32_t size); +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void *buffer, uint32_t size); +static int read_logical_block(zns_info *info, logical_block *block, void *buffer); +static void merge(zns_info *info, logical_block *block); static void *garbage_collection(void *info_ptr); -int init_ss_zns_device(struct zdev_init_params *params, - struct user_zns_device **my_dev) +int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { - //Assign the private ptr to zns_info - *my_dev = (user_zns_device *)calloc(1, sizeof(user_zns_device)); - (*my_dev)->_private = calloc(1, sizeof(zns_info)); + *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); + (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; // set num_log_zones info->num_log_zones = params->log_zones; @@ -139,236 +129,184 @@ int init_ss_zns_device(struct zdev_init_params *params, } // reset device if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, - NVME_ZNS_ZSA_RESET, 0U, NULL); + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, NVME_ZNS_ZSA_RESET, 0U, NULL); if (ret) { printf("Zone reset failed %d\n", ret); return ret; } } - // set zns_lba_size or zns_page_size : Its same for now! + // set zns_lba_size or page_size : Its same for now! nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); if (ret) { printf("Failed to retrieve the nvme identify namespace %d\n", ret); return ret; } - (*my_dev)->tparams.zns_lba_size = 1 << ns.lbaf[ns.flbas & 0xF].ds; - (*my_dev)->lba_size_bytes = (*my_dev)->tparams.zns_lba_size; - info->zns_page_size = (*my_dev)->tparams.zns_lba_size; - // set zns_num_zones + info->page_size = 1U << ns.lbaf[ns.flbas & 0xF].ds; + (*my_dev)->tparams.zns_lba_size = info->page_size; + (*my_dev)->lba_size_bytes = info->page_size; + // set num_zones nvme_zone_report zns_report; - ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - false, sizeof(zns_report), &zns_report); + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, sizeof(zns_report), &zns_report); if (ret) { printf("Failed to report zones, ret %d\n", ret); return ret; } - (*my_dev)->tparams.zns_num_zones = le64_to_cpu(zns_report.nr_zones); - info->zns_num_zones = (*my_dev)->tparams.zns_num_zones; - // set num_data_zones = zns_num_zones - num_log_zones - info->num_data_zones = info->zns_num_zones - info->num_log_zones; + info->num_zones = le64_to_cpu(zns_report.nr_zones); + (*my_dev)->tparams.zns_num_zones = info->num_zones; + // set num_data_zones = num_zones - num_log_zones + info->num_data_zones = info->num_zones - info->num_log_zones; // set zone_num_pages nvme_zns_id_ns data; nvme_zns_identify_ns(info->fd, info->nsid, &data); info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * - (*my_dev)->tparams.zns_lba_size; + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * info->page_size; // set user capacity bytes = #data_zones * zone_capacity - (*my_dev)->capacity_bytes = (info->num_data_zones) * - (*my_dev)->tparams.zns_zone_capacity; + (*my_dev)->capacity_bytes = (info->num_data_zones) * (*my_dev)->tparams.zns_zone_capacity; // set max_data_transfer_size - struct nvme_id_ctrl ctrl; - nvme_identify_ctrl(info->fd, &ctrl); + nvme_id_ctrl id0; + nvme_identify_ctrl(info->fd, &id0); void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); if (errno) { printf("Failed to mmap\n"); return errno; } - info->mdts = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + ctrl.mdts)) * - (*my_dev)->lba_size_bytes; + info->mdts = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id0.mdts - 1U)) * info->page_size; // set zone_append_size_limit - struct nvme_zns_id_ctrl id; - nvme_zns_identify_ctrl(info->fd, &id); - info->zasl = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id.zasl)) * - (*my_dev)->lba_size_bytes; + nvme_zns_id_ctrl id1; + nvme_zns_identify_ctrl(info->fd, &id1); + info->zasl = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id1.zasl - 1U)) * info->page_size; munmap(regs, getpagesize()); if (errno) { printf("Failed to munmap\n"); return errno; } - // init zones_list_lock - pthread_mutex_init(&info->zones_list_lock, NULL); - // set all zone index to free_zones_list - info->free_zones_list = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail = info->free_zones_list; - pthread_mutex_init(&info->free_zones_list->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_list->write_ptr_lock, NULL); - for (uint32_t i = 1; i < info->zns_num_zones; ++i) { - info->free_zones_list_tail->next = (zone_info *)calloc(1, sizeof(zone_info)); - info->free_zones_list_tail = info->free_zones_list_tail->next; - info->free_zones_list_tail->zone_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_list_tail->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_list_tail->write_ptr_lock, NULL); + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); + // set all zone index to free_zones + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); + for (uint32_t i = 1U; i < info->num_zones; ++i) { + info->free_zones_tail->next = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones_tail->next; + info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); } // set num_free_zones - info->num_free_zones = info->zns_num_zones; + info->num_free_zones = info->num_zones; //Set current log zone to 0th zone - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; - if (!info->free_zones_list) - info->free_zones_list_tail = NULL; + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; info->curr_log_zone->next = NULL; - info->curr_log_zone->num_valid_pages = 0U; --info->num_free_zones; // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, sizeof(logical_block)); for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - info->logical_blocks[i].logical_block_saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->logical_blocks[i].logical_block_lock, NULL); + info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } //Start GC info->run_gc = true; - pthread_create(&info->gc_thread, NULL, &garbage_collection, (void *)info); + pthread_create(&info->gc_thread, NULL, &garbage_collection, info); return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, - void *buffer, uint32_t size) +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; - uint32_t logical_page_addr = address / info->zns_page_size; - //FIXME: Proision for contiguos block read, but not written contiguous + unsigned long long page_addr = address / info->page_size; while (size) { - uint32_t index = get_block_index(logical_page_addr, - info->zone_num_pages); + uint32_t index = get_block_index(page_addr, info->zone_num_pages); + uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - uint32_t offset = get_data_offset(logical_page_addr, - info->zone_num_pages); - uint32_t curr_block_read_size = (info->zone_num_pages - offset) * - info->zns_page_size; + uint32_t curr_block_read_size = (info->zone_num_pages - offset) * info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - pthread_mutex_lock(&block->logical_block_lock); - if (block->block_map) { - uint32_t read_size = 0U; - while (read_size < curr_block_read_size) { - uint32_t curr_read_size = info->mdts; - if (curr_read_size > curr_block_read_size - read_size) - curr_read_size = curr_block_read_size - read_size; - read_from_zns(info, - block->block_map->zone_saddr + offset + - read_size / info->zns_page_size, - (char *)buffer + read_size, curr_read_size); - read_size += curr_read_size; - } + pthread_mutex_lock(&block->lock); + if (block->data_zone) { + uint32_t curr_read_size = block->data_zone->write_ptr * info->page_size; + if (curr_read_size > curr_block_read_size) + curr_read_size = curr_block_read_size; + read_from_zns(info, block->data_zone->saddr + offset, buffer, curr_read_size); } - unsigned long long prev_physical_addr = 0ULL; - unsigned long long curr_start_physical_addr = 0ULL; - uint32_t curr_read_offset = 0U; - uint32_t curr_read_size = 0U; - page_map *curr_page_map = block->page_maps ? block->page_maps : - block->old_page_maps; - for (uint32_t i = 0U; i < curr_block_read_size; - i += info->zns_page_size, ++logical_page_addr) { - unsigned long long physical_addr = 0ULL; - bool get_addr = look_up_map(curr_page_map, - logical_page_addr, &physical_addr); - if (get_addr) { - if (!curr_read_size) { - curr_start_physical_addr = physical_addr; - curr_read_offset = i; - } else if (physical_addr - prev_physical_addr != 1) { // if physical address are not continuous - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, - curr_read_size); - curr_start_physical_addr = physical_addr; - curr_read_offset = i; - curr_read_size = 0U; + page_map *curr = block->page_maps ? block->page_maps : block->old_page_maps; + while (curr && curr->page_addr < page_addr) + curr = curr->next; + unsigned long long max_page_addr = page_addr + curr_block_read_size / info->page_size - 1ULL; + if (curr && curr->page_addr <= max_page_addr) { + page_map *prev = curr; + page_map *start = curr; + curr = curr->next; + while (curr) { + if (curr->page_addr > max_page_addr) + break; + if (curr->page_addr - prev->page_addr != 1ULL || curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - page_addr) * info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + start = curr; } - curr_read_size += info->zns_page_size; - if (curr_read_size == info->mdts) { // if current read size is equal to mdts, then read data from zns - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, - curr_read_size); - curr_read_size = 0U; - } else { - prev_physical_addr = physical_addr; - } - } else if (curr_read_size) { // if physical address are not continuous - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, - curr_read_size); - curr_read_size = 0U; + prev = curr; + curr = curr->next; } + unsigned long long buff_offset = (start->page_addr - page_addr) * info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); } - if (curr_read_size) // read the rest of data - read_from_zns(info, curr_start_physical_addr, - (char *)buffer + curr_read_offset, curr_read_size); - pthread_mutex_unlock(&block->logical_block_lock); + pthread_mutex_unlock(&block->lock); + page_addr += curr_block_read_size / info->page_size; buffer = (char *)buffer + curr_block_read_size; size -= curr_block_read_size; } return errno; } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, - void *buffer, uint32_t size) +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; while (size) { - uint32_t index = get_block_index(address / info->zns_page_size, - info->zone_num_pages); + uint32_t index = get_block_index(address / info->page_size, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - uint32_t offset = get_data_offset(address / info->zns_page_size, - info->zone_num_pages); + uint32_t offset = get_data_offset(address / info->page_size, info->zone_num_pages); uint32_t curr_append_size = 0U; - pthread_mutex_lock(&block->logical_block_lock); + pthread_mutex_lock(&block->lock); // if can write to data zone directly - if (!block->old_page_maps && block->block_map && - block->block_map->write_ptr <= offset) { - if (block->block_map->write_ptr < offset) { + if (!block->old_page_maps && block->data_zone && block->data_zone->write_ptr <= offset) { + if (block->data_zone->write_ptr < offset) { // append null data until arrive offset - uint32_t null_size = (offset - block->block_map->write_ptr) * - info->zns_page_size; - char *null_buffer = (char *)calloc(null_size, sizeof(char)); - int ret = append_to_data_zone(info, block->block_map->zone_saddr, - null_buffer, null_size); - free(null_buffer); + uint32_t null_size = (offset - block->data_zone->write_ptr) * info->page_size; + char null_buffer[null_size]; + memset(null_buffer, 0, null_size); + int ret = append_to_data_zone(info, block->data_zone, null_buffer, null_size); if (ret) { - pthread_mutex_unlock(&block->logical_block_lock); + pthread_mutex_unlock(&block->lock); return ret; } - increase_zone_write_ptr(block->block_map, - offset - block->block_map->write_ptr); } - curr_append_size = (info->zone_num_pages - offset) * - info->zns_page_size; + curr_append_size = (info->zone_num_pages - offset) * info->page_size; if (curr_append_size > size) curr_append_size = size; - int ret = append_to_data_zone(info, block->block_map->zone_saddr, - buffer, curr_append_size); + int ret = append_to_data_zone(info, block->data_zone, buffer, curr_append_size); if (ret) { - pthread_mutex_unlock(&block->logical_block_lock); + pthread_mutex_unlock(&block->lock); return ret; } - increase_zone_write_ptr(block->block_map, - curr_append_size / info->zns_page_size); - pthread_mutex_unlock(&block->logical_block_lock); + pthread_mutex_unlock(&block->lock); } else { curr_append_size = size; - if (!block->old_page_maps && block->block_map) { - uint32_t diff_size = (block->block_map->write_ptr - offset) * - info->zns_page_size; + if (block->data_zone) { + uint32_t diff_size = (block->data_zone->write_ptr - offset) * info->page_size; if (curr_append_size > diff_size) curr_append_size = diff_size; } - pthread_mutex_unlock(&block->logical_block_lock); - int ret = append_to_log_zone(info, address / info->zns_page_size, - buffer, curr_append_size); + pthread_mutex_unlock(&block->lock); + int ret = append_to_log_zone(info, address / info->page_size, buffer, curr_append_size); if (ret) return ret; } @@ -394,24 +332,24 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) blocks[i].page_maps = blocks[i].page_maps->next; free(tmp); } - if (blocks[i].block_map) { - pthread_mutex_destroy(&blocks[i].block_map->num_valid_pages_lock); - pthread_mutex_destroy(&blocks[i].block_map->write_ptr_lock); - free(blocks[i].block_map); + if (blocks[i].data_zone) { + pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); + pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); + free(blocks[i].data_zone); } - pthread_mutex_destroy(&blocks[i].logical_block_lock); + pthread_mutex_destroy(&blocks[i].lock); } free(blocks); - while (info->used_log_zones_list) { - zone_info *tmp = info->used_log_zones_list; - info->used_log_zones_list = info->used_log_zones_list->next; + while (info->used_log_zones) { + zone_info *tmp = info->used_log_zones; + info->used_log_zones = info->used_log_zones->next; pthread_mutex_destroy(&tmp->num_valid_pages_lock); pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } - while (info->free_zones_list) { - zone_info *tmp = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; + while (info->free_zones) { + zone_info *tmp = info->free_zones; + info->free_zones = info->free_zones->next; pthread_mutex_destroy(&tmp->num_valid_pages_lock); pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); @@ -419,298 +357,267 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); free(info->curr_log_zone); - pthread_mutex_destroy(&info->zones_list_lock); + pthread_mutex_destroy(&info->zones_lock); free(info); free(my_dev); return 0; } -static inline void increase_zone_num_valid_page(zone_info *zone, - uint32_t num_pages) +static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages) { pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages += num_pages; pthread_mutex_unlock(&zone->num_valid_pages_lock); } -static inline void decrease_zone_num_valid_page(zone_info *zone, - uint32_t num_pages) +static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages) { pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages -= num_pages; pthread_mutex_unlock(&zone->num_valid_pages_lock); } -static inline void increase_zone_write_ptr(zone_info *zone, - uint32_t num_pages) +static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages) { pthread_mutex_lock(&zone->write_ptr_lock); zone->write_ptr += num_pages; pthread_mutex_unlock(&zone->write_ptr_lock); } -static inline void decrease_zone_write_ptr(zone_info *zone, - uint32_t num_pages) +static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages) { pthread_mutex_lock(&zone->write_ptr_lock); zone->write_ptr -= num_pages; pthread_mutex_unlock(&zone->write_ptr_lock); } -static inline uint32_t get_block_index(uint32_t key, uint32_t base) +static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages) { - return key / base; + return page_addr / zone_num_pages; } -static inline uint32_t get_data_offset(uint32_t key, uint32_t base) +static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages) { - return key % base; + return page_addr % zone_num_pages; } static void change_log_zone(zns_info *info) { - pthread_mutex_lock(&info->zones_list_lock); // Lock for changing used_log_zones_list and accessing free zones list; - if (info->used_log_zones_list) - info->used_log_zones_list_tail->next = info->curr_log_zone; + pthread_mutex_lock(&info->zones_lock); // Lock for changing used_log_zones and accessing free zones list; + if (info->used_log_zones) + info->used_log_zones_tail->next = info->curr_log_zone; else - info->used_log_zones_list = info->curr_log_zone; - info->used_log_zones_list_tail = info->curr_log_zone; + info->used_log_zones = info->curr_log_zone; + info->used_log_zones_tail = info->curr_log_zone; info->curr_log_zone = NULL; ++info->num_used_log_zones; - pthread_mutex_unlock(&info->zones_list_lock); + pthread_mutex_unlock(&info->zones_lock); while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; while (!info->curr_log_zone) { - pthread_mutex_lock(&info->zones_list_lock); - if (info->num_free_zones > 1) { - info->curr_log_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; + pthread_mutex_lock(&info->zones_lock); + if (info->num_free_zones) { + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; info->curr_log_zone->next = NULL; --info->num_free_zones; } - pthread_mutex_unlock(&info->zones_list_lock); + pthread_mutex_unlock(&info->zones_lock); } } -static bool look_up_map(page_map *maps, uint64_t logical_page_addr, - unsigned long long *physical_addr) +static void update_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr) { - //Lock the logical block - //Search in log zone - for (page_map *head = maps; head; head = head->next) { - if (head->logical_page_addr > logical_page_addr) - return false; - if (head->logical_page_addr == logical_page_addr) { - *physical_addr = head->physical_addr; - return true; - } - } - return false; -} - -static void update_map(zns_info *info, uint64_t logical_page_addr, - unsigned long long physical_addr) -{ - uint32_t index = get_block_index(logical_page_addr, info->zone_num_pages); + uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - increase_zone_num_valid_page(info->curr_log_zone, 1); - increase_zone_write_ptr(info->curr_log_zone, 1); //Lock for updating page map - pthread_mutex_lock(&block->logical_block_lock); + pthread_mutex_lock(&block->lock); if (!block->page_maps) { block->page_maps = (page_map *)calloc(1, sizeof(page_map)); - block->page_maps->logical_page_addr = logical_page_addr; + block->page_maps_tail = block->page_maps; + block->page_maps->page_addr = page_addr; block->page_maps->physical_addr = physical_addr; - block->page_maps->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); return; } - if (block->page_maps->logical_page_addr == logical_page_addr) { + if (block->page_maps->page_addr == page_addr) { //Update log counter - decrease_zone_num_valid_page(block->page_maps->page_zone_info, 1); + decrease_num_valid_page(block->page_maps->zone, 1U); block->page_maps->physical_addr = physical_addr; - block->page_maps->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); return; } - if (block->page_maps->logical_page_addr > logical_page_addr) { + if (block->page_maps->page_addr > page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = block->page_maps; block->page_maps = tmp; - tmp->logical_page_addr = logical_page_addr; + tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; - tmp->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + tmp->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); return; } page_map *ptr = block->page_maps; while (ptr->next) { - if (ptr->next->logical_page_addr == logical_page_addr) { + if (ptr->next->page_addr == page_addr) { //Update log counter - decrease_zone_num_valid_page(ptr->next->page_zone_info, 1); + decrease_num_valid_page(ptr->next->zone, 1U); ptr->next->physical_addr = physical_addr; - ptr->next->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); return; - } else if (ptr->next->logical_page_addr > logical_page_addr) { + } else if (ptr->next->page_addr > page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = ptr->next; ptr->next = tmp; - tmp->logical_page_addr = logical_page_addr; + tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; - tmp->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + tmp->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); return; } ptr = ptr->next; } ptr->next = (page_map *)calloc(1, sizeof(page_map)); - ptr->next->logical_page_addr = logical_page_addr; + block->page_maps_tail = ptr->next; + ptr->next->page_addr = page_addr; ptr->next->physical_addr = physical_addr; - ptr->next->page_zone_info = info->curr_log_zone; - pthread_mutex_unlock(&block->logical_block_lock); + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); } -static int read_from_zns(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size) +static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size) { - unsigned short num_pages = size / info->zns_page_size; - nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, - 0U, 0U, 0U, 0U, 0U, size, buffer, 0U, NULL); + while (size) { + unsigned curr_read_size = size < info->mdts ? size : info->mdts; + unsigned short num_pages = curr_read_size / info->page_size; + nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); + physical_addr += num_pages; + buffer = (char *)buffer + curr_read_size; + size -= curr_read_size; + } return errno; } -static int append_to_data_zone(zns_info *info, unsigned long long saddr, - void *buffer, uint32_t size) +static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, uint32_t size) { - uint32_t appended_size = 0U; - while (appended_size < size) { + increase_write_ptr(zone, size / info->page_size); + while (size) { unsigned long long physical_addr = 0ULL; uint32_t curr_append_size = info->zasl; - if (curr_append_size > size - appended_size) - curr_append_size = size - appended_size; - unsigned short num_curr_append_pages = curr_append_size / info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, saddr, num_curr_append_pages - 1, - 0U, 0U, 0U, 0U, curr_append_size, - (char *)buffer + appended_size, 0U, NULL, &physical_addr); + if (curr_append_size > size) + curr_append_size = size; + unsigned short num_curr_append_pages = curr_append_size / info->page_size; + nvme_zns_append(info->fd, info->nsid, zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); if (errno) return errno; - appended_size += curr_append_size; + buffer = (char *)buffer + curr_append_size; + size -= curr_append_size; } return errno; } -static int append_to_log_zone(zns_info *info, uint64_t logical_page_addr, - void *buffer, uint32_t size) +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void *buffer, uint32_t size) { - uint32_t appended_size = 0U; - while (appended_size < size) { - unsigned long long physical_addr = 0ULL; - bool need_to_change_log_zone = true; - uint32_t curr_append_size = (info->zone_num_pages - - info->curr_log_zone->write_ptr) * - info->zns_page_size; + while (size) { + bool change = true; + uint32_t curr_append_size = (info->zone_num_pages - info->curr_log_zone->write_ptr) * info->page_size; if (curr_append_size > info->zasl) { curr_append_size = info->zasl; - need_to_change_log_zone = false; + change = false; } - if (curr_append_size > size - appended_size) { - curr_append_size = size - appended_size; - need_to_change_log_zone = false; + if (curr_append_size > size) { + curr_append_size = size; + change = false; } - unsigned short num_curr_append_pages = curr_append_size / - info->zns_page_size; - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->zone_saddr, - num_curr_append_pages - 1, 0U, 0U, 0U, 0U, - curr_append_size, (char *)buffer + appended_size, - 0U, NULL, &physical_addr); + unsigned long long physical_addr = 0ULL; + unsigned short num_curr_append_pages = curr_append_size / info->page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); if (errno) return errno; - for (uint32_t i = 0U; i < num_curr_append_pages; - ++i, ++logical_page_addr, ++physical_addr) - update_map(info, logical_page_addr, physical_addr); - if (need_to_change_log_zone) + increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); + increase_write_ptr(info->curr_log_zone, num_curr_append_pages); + for (uint32_t i = 0U; i < num_curr_append_pages; ++i) + update_map(info, page_addr++, physical_addr++); + if (change) change_log_zone(info); - appended_size += curr_append_size; + buffer = (char *)buffer + curr_append_size; + size -= curr_append_size; } return errno; } -static void merge(zns_info *info, logical_block *block, zone_info *new_zone) +static int read_logical_block(zns_info *info, logical_block *block, void *buffer) { - pthread_mutex_lock(&block->logical_block_lock); + //FIXME: Proision for contiguos block read, but not written + if (block->data_zone) + read_from_zns(info, block->data_zone->saddr, buffer, block->data_zone->write_ptr * info->page_size); + page_map *prev = block->old_page_maps; + page_map *start = block->old_page_maps; + page_map *curr = block->old_page_maps->next; + decrease_num_valid_page(prev->zone, 1U); + while (curr) { + if (curr->page_addr - prev->page_addr != 1ULL || curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + start = curr; + } + decrease_num_valid_page(curr->zone, 1U); + prev = curr; + curr = curr->next; + } + unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + return errno; +} + +static void merge(zns_info *info, logical_block *block) +{ + pthread_mutex_lock(&block->lock); block->old_page_maps = block->page_maps; block->page_maps = NULL; - pthread_mutex_unlock(&block->logical_block_lock); - page_map *ptr = block->old_page_maps; - zone_info *old_used_data_zone = block->block_map; - uint32_t zone_append_page_limit = info->zasl / info->zns_page_size; - char * buffer = (char *)calloc(info->zasl, sizeof(char)); - for (uint32_t offset = 0U; offset < info->zone_num_pages; ++offset) { - unsigned long long page_physical_addr = 0ULL; - bool have_data = false; - bool still_have_data = false; - // if data in data zone - if (old_used_data_zone) { - have_data = true; - page_physical_addr = old_used_data_zone->zone_saddr + offset; - decrease_zone_write_ptr(old_used_data_zone, 1); - if (old_used_data_zone->write_ptr) - still_have_data = true; - } - // if data in log zone - if (ptr && - ptr->logical_page_addr == block->logical_block_saddr + offset) { - have_data = true; - page_physical_addr = ptr->physical_addr; - decrease_zone_num_valid_page(ptr->page_zone_info, 1); - ptr = ptr->next; - if (ptr) - still_have_data = true; - } - if (have_data) - read_from_zns(info, page_physical_addr, - buffer + (offset % zone_append_page_limit) * info->zns_page_size, - info->zns_page_size); - if (!still_have_data) { - append_to_data_zone(info, new_zone->zone_saddr, buffer, - (offset % zone_append_page_limit + 1) * info->zns_page_size); - increase_zone_write_ptr(new_zone, - offset % zone_append_page_limit + 1); - break; - } - if (offset % zone_append_page_limit == zone_append_page_limit - 1) { - append_to_data_zone(info, new_zone->zone_saddr, - buffer, info->zasl); - increase_zone_write_ptr(new_zone, zone_append_page_limit); - memset(buffer, 0, info->zasl); - } + pthread_mutex_unlock(&block->lock); + uint32_t size = get_data_offset(block->page_maps_tail->page_addr, info->zone_num_pages) + 1U; + if (block->data_zone && block->data_zone->write_ptr > size) + size = block->data_zone->write_ptr; + size *= info->page_size; + char buffer[size]; + memset(buffer, 0, size); + read_logical_block(info, block, buffer); + pthread_mutex_lock(&block->lock); + // Append old data zone to free zones list + if (block->data_zone) { + decrease_write_ptr(block->data_zone, block->data_zone->write_ptr); + nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_lock); + if (info->free_zones) + info->free_zones_tail->next = block->data_zone; + else + info->free_zones = block->data_zone; + info->free_zones_tail = block->data_zone; + ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); } - free(buffer); - pthread_mutex_lock(&block->logical_block_lock); + pthread_mutex_lock(&info->zones_lock); + // Get free zone and nullify the next + block->data_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; + block->data_zone->next = NULL; + --info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); + append_to_data_zone(info, block->data_zone, buffer, size); while (block->old_page_maps) { page_map *tmp = block->old_page_maps; block->old_page_maps = block->old_page_maps->next; free(tmp); } - block->block_map = new_zone; - pthread_mutex_unlock(&block->logical_block_lock); - // Append old data zone to free zones list - if (old_used_data_zone) { - decrease_zone_write_ptr(old_used_data_zone, - old_used_data_zone->write_ptr); - nvme_zns_mgmt_send(info->fd, info->nsid, - old_used_data_zone->zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0U, NULL); - pthread_mutex_lock(&info->zones_list_lock); - if (info->free_zones_list) - info->free_zones_list_tail->next = old_used_data_zone; - else - info->free_zones_list = old_used_data_zone; - info->free_zones_list_tail = old_used_data_zone; - ++info->num_free_zones; - pthread_mutex_unlock(&info->zones_list_lock); - } + pthread_mutex_unlock(&block->lock); } static void *garbage_collection(void *info_ptr) @@ -718,71 +625,63 @@ static void *garbage_collection(void *info_ptr) zns_info *info = (zns_info *)info_ptr; uint32_t index = 0U; while (info->run_gc) { - while (info->num_log_zones - info->num_used_log_zones > - info->gc_wmark) { + while (info->num_log_zones - info->num_used_log_zones > info->gc_wmark) { if (!info->run_gc) return NULL; } logical_block *block = &info->logical_blocks[index]; while(!block->page_maps) { - index = (index + 1) % info->num_data_zones; + index = (index + 1U) % info->num_data_zones; block = &info->logical_blocks[index]; if (!info->run_gc) return NULL; } - pthread_mutex_lock(&info->zones_list_lock); - // Get free zone and nullify the next - zone_info *free_zone = info->free_zones_list; - info->free_zones_list = info->free_zones_list->next; - if (!info->free_zones_list) - info->free_zones_list_tail = NULL; - free_zone->next = NULL; - --info->num_free_zones; - pthread_mutex_unlock(&info->zones_list_lock); if (!info->run_gc) return NULL; // Merge logical block to data zone - merge(info, block, free_zone); + merge(info, block); if (!info->run_gc) return NULL; - // Check used log zone valid counter if zero reset and add to free zone list - // Remove zone from used_log_zones_list if valid_page is zero and add that zone to free zones list - for (zone_info *prev = NULL, *free = NULL, - *curr = info->used_log_zones_list; info->run_gc && curr; ) { + // Check used log zone valid counter + // if zero reset and add to free zone list + // Remove zone from used_log_zones + // if valid_page is zero and add that zone to free zones list + zone_info *prev = NULL; + zone_info *free = NULL; + zone_info *curr = info->used_log_zones; + while (info->run_gc && curr) { if (!curr->num_valid_pages) { // reset - decrease_zone_write_ptr(curr, curr->write_ptr); - nvme_zns_mgmt_send(info->fd, info->nsid, - curr->zone_saddr, false, - NVME_ZNS_ZSA_RESET, 0U, NULL); - pthread_mutex_lock(&info->zones_list_lock); + decrease_write_ptr(curr, curr->write_ptr); + nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_lock); // Remove from used_log_zones free = curr; curr = curr->next; if (prev) { prev->next = curr; - if (free == info->used_log_zones_list_tail) - info->used_log_zones_list_tail = prev; + if (free == info->used_log_zones_tail) + info->used_log_zones_tail = prev; } else { - info->used_log_zones_list = curr; - if (!info->used_log_zones_list) - info->used_log_zones_list_tail = NULL; + info->used_log_zones = curr; + if (!info->used_log_zones) + info->used_log_zones_tail = NULL; } free->next = NULL; --info->num_used_log_zones; - if (info->free_zones_list) - info->free_zones_list_tail->next = free; + if (info->free_zones) + info->free_zones_tail->next = free; else - info->free_zones_list = free; - info->free_zones_list_tail = free; + info->free_zones = free; + info->free_zones_tail = free; ++info->num_free_zones; - pthread_mutex_unlock(&info->zones_list_lock); + pthread_mutex_unlock(&info->zones_lock); } else { prev = curr; curr = curr->next; } } - index = (index + 1) % info->num_data_zones; + index = (index + 1U) % info->num_data_zones; } return NULL; } From 6a6cfd986fce527031e99f65bfeed36ed58eafc3 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Wed, 12 Oct 2022 10:12:05 +0000 Subject: [PATCH 048/101] add load balancing --- src/m23-ftl/zns_device.cpp | 314 ++++++++++++++++++++++++++++--------- 1 file changed, 240 insertions(+), 74 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 5399cf1..15bef92 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -33,6 +33,15 @@ SOFTWARE. extern "C" { +enum { + user_read = 0x1, + gc_read = 0x2, + sb_read = user_read | gc_read, + user_write = 0x10, + gc_write = 0x20, + sb_write = user_write | gc_write +}; + // zone in zns struct zone_info { unsigned long long saddr; @@ -51,7 +60,7 @@ struct page_map { page_map *next; // page map for each logical block }; -// logical block contains data in log zone (page map) and data in data zone (block map) +// Contains data in log zone (page map) and data in data zone (block map) struct logical_block { unsigned long long s_page_addr; page_map *page_maps; // page mapping for this logical block (log zone) @@ -75,8 +84,12 @@ struct zns_info { uint32_t num_zones; uint32_t num_data_zones; uint32_t zone_num_pages; - uint32_t mdts; // max data transfer size (read limit) + uint32_t mdts; // max data transfer size (read + append limit) uint32_t zasl; // zone append size limit (append limit) + uint8_t used_status; + uint32_t free_transfer_size; + uint32_t free_append_size; + pthread_mutex_t size_limit_lock; // Log zones zone_info *curr_log_zone; int num_used_log_zones; @@ -86,7 +99,7 @@ struct zns_info { uint32_t num_free_zones; zone_info *free_zones; zone_info *free_zones_tail; - pthread_mutex_t zones_lock; + pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone // logical block corresponding to each data zone logical_block *logical_blocks; }; @@ -95,18 +108,28 @@ static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages); static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages); static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages); -static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages); -static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); +static inline uint32_t get_block_index(unsigned long long page_addr, + uint32_t zone_num_pages); +static inline uint32_t get_data_offset(unsigned long long page_addr, + uint32_t zone_num_pages); static void change_log_zone(zns_info *info); -static void update_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr); -static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size); -static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, uint32_t size); -static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void *buffer, uint32_t size); -static int read_logical_block(zns_info *info, logical_block *block, void *buffer); +static void update_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr); +static unsigned request_transfer_size(zns_info *info, uint8_t type); +static void free_transfer_size(zns_info *info, uint8_t type, unsigned size); +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size, uint8_t type); +static int append_to_data_zone(zns_info *info, zone_info *zone, + void *buffer, uint32_t size, uint8_t type); +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, + void *buffer, uint32_t size); +static int read_logical_block(zns_info *info, logical_block *block, + void *buffer); static void merge(zns_info *info, logical_block *block); static void *garbage_collection(void *info_ptr); -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) +int init_ss_zns_device(struct zdev_init_params *params, + struct user_zns_device **my_dev) { *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); @@ -129,7 +152,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * } // reset device if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, NVME_ZNS_ZSA_RESET, 0U, NULL); + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); if (ret) { printf("Zone reset failed %d\n", ret); return ret; @@ -147,7 +171,10 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * (*my_dev)->lba_size_bytes = info->page_size; // set num_zones nvme_zone_report zns_report; - ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, sizeof(zns_report), &zns_report); + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, + NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, false, + sizeof(zns_report), &zns_report); if (ret) { printf("Failed to report zones, ret %d\n", ret); return ret; @@ -161,9 +188,11 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * nvme_zns_identify_ns(info->fd, info->nsid, &data); info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * info->page_size; + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + info->page_size; // set user capacity bytes = #data_zones * zone_capacity - (*my_dev)->capacity_bytes = (info->num_data_zones) * (*my_dev)->tparams.zns_zone_capacity; + (*my_dev)->capacity_bytes = (info->num_data_zones) * + (*my_dev)->tparams.zns_zone_capacity; // set max_data_transfer_size nvme_id_ctrl id0; nvme_identify_ctrl(info->fd, &id0); @@ -172,16 +201,21 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * printf("Failed to mmap\n"); return errno; } - info->mdts = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id0.mdts - 1U)) * info->page_size; + info->mdts = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id0.mdts)) - + 2U) * info->page_size; // set zone_append_size_limit nvme_zns_id_ctrl id1; nvme_zns_identify_ctrl(info->fd, &id1); - info->zasl = (1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id1.zasl - 1U)) * info->page_size; + info->zasl = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id1.zasl)) - + 2U) * info->page_size; munmap(regs, getpagesize()); if (errno) { printf("Failed to munmap\n"); return errno; } + info->free_transfer_size = info->mdts; + info->free_append_size = info->zasl; + pthread_mutex_init(&info->size_limit_lock, NULL); // init zones_lock pthread_mutex_init(&info->zones_lock, NULL); // set all zone index to free_zones @@ -190,7 +224,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); for (uint32_t i = 1U; i < info->num_zones; ++i) { - info->free_zones_tail->next = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail->next = (zone_info *)calloc(1UL, + sizeof(zone_info)); info->free_zones_tail = info->free_zones_tail->next; info->free_zones_tail->saddr = i * info->zone_num_pages; pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); @@ -206,7 +241,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * info->curr_log_zone->next = NULL; --info->num_free_zones; // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, sizeof(logical_block)); + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; pthread_mutex_init(&info->logical_blocks[i].lock, NULL); @@ -217,7 +253,8 @@ int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device * return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) +int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; unsigned long long page_addr = address / info->page_size; @@ -225,20 +262,25 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buf uint32_t index = get_block_index(page_addr, info->zone_num_pages); uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - uint32_t curr_block_read_size = (info->zone_num_pages - offset) * info->page_size; + uint32_t curr_block_read_size = (info->zone_num_pages - offset) * + info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; pthread_mutex_lock(&block->lock); if (block->data_zone) { - uint32_t curr_read_size = block->data_zone->write_ptr * info->page_size; + uint32_t curr_read_size = block->data_zone->write_ptr * + info->page_size; if (curr_read_size > curr_block_read_size) curr_read_size = curr_block_read_size; - read_from_zns(info, block->data_zone->saddr + offset, buffer, curr_read_size); + read_from_zns(info, block->data_zone->saddr + offset, + buffer, curr_read_size, user_read); } - page_map *curr = block->page_maps ? block->page_maps : block->old_page_maps; + page_map *curr = block->page_maps ? block->page_maps : + block->old_page_maps; while (curr && curr->page_addr < page_addr) curr = curr->next; - unsigned long long max_page_addr = page_addr + curr_block_read_size / info->page_size - 1ULL; + unsigned long long max_page_addr = page_addr + curr_block_read_size / + info->page_size - 1ULL; if (curr && curr->page_addr <= max_page_addr) { page_map *prev = curr; page_map *start = curr; @@ -246,53 +288,76 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buf while (curr) { if (curr->page_addr > max_page_addr) break; - if (curr->page_addr - prev->page_addr != 1ULL || curr->physical_addr - prev->physical_addr != 1ULL) { - unsigned long long buff_offset = (start->page_addr - page_addr) * info->page_size; - uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; - read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + if (curr->page_addr - prev->page_addr != 1ULL || + curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - + page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - + start->page_addr + 1ULL) * + info->page_size; + read_from_zns(info, start->physical_addr, + (char *)buffer + buff_offset, curr_read_size, + user_read); start = curr; } prev = curr; curr = curr->next; } - unsigned long long buff_offset = (start->page_addr - page_addr) * info->page_size; - uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; - read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + unsigned long long buff_offset = (start->page_addr - page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, + (char *)buffer + buff_offset, curr_read_size, + user_read); } pthread_mutex_unlock(&block->lock); page_addr += curr_block_read_size / info->page_size; buffer = (char *)buffer + curr_block_read_size; size -= curr_block_read_size; } + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~user_read; + pthread_mutex_unlock(&info->size_limit_lock); return errno; } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) +int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; while (size) { - uint32_t index = get_block_index(address / info->page_size, info->zone_num_pages); + uint32_t index = get_block_index(address / info->page_size, + info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - uint32_t offset = get_data_offset(address / info->page_size, info->zone_num_pages); + uint32_t offset = get_data_offset(address / info->page_size, + info->zone_num_pages); uint32_t curr_append_size = 0U; pthread_mutex_lock(&block->lock); // if can write to data zone directly - if (!block->old_page_maps && block->data_zone && block->data_zone->write_ptr <= offset) { + if (!block->old_page_maps && + block->data_zone && block->data_zone->write_ptr <= offset) { if (block->data_zone->write_ptr < offset) { // append null data until arrive offset - uint32_t null_size = (offset - block->data_zone->write_ptr) * info->page_size; + uint32_t null_size = (offset - block->data_zone->write_ptr) * + info->page_size; char null_buffer[null_size]; memset(null_buffer, 0, null_size); - int ret = append_to_data_zone(info, block->data_zone, null_buffer, null_size); + int ret = append_to_data_zone(info, block->data_zone, + null_buffer, null_size, + user_write); if (ret) { pthread_mutex_unlock(&block->lock); return ret; } } - curr_append_size = (info->zone_num_pages - offset) * info->page_size; + curr_append_size = (info->zone_num_pages - offset) * + info->page_size; if (curr_append_size > size) curr_append_size = size; - int ret = append_to_data_zone(info, block->data_zone, buffer, curr_append_size); + int ret = append_to_data_zone(info, block->data_zone, + buffer, curr_append_size, user_write); if (ret) { pthread_mutex_unlock(&block->lock); return ret; @@ -301,12 +366,14 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *bu } else { curr_append_size = size; if (block->data_zone) { - uint32_t diff_size = (block->data_zone->write_ptr - offset) * info->page_size; + uint32_t diff_size = (block->data_zone->write_ptr - offset) * + info->page_size; if (curr_append_size > diff_size) curr_append_size = diff_size; } pthread_mutex_unlock(&block->lock); - int ret = append_to_log_zone(info, address / info->page_size, buffer, curr_append_size); + int ret = append_to_log_zone(info, address / info->page_size, + buffer, curr_append_size); if (ret) return ret; } @@ -314,7 +381,10 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *bu buffer = (char *)buffer + curr_append_size; size -= curr_append_size; } - return 0; + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~user_write; + pthread_mutex_unlock(&info->size_limit_lock); + return errno; } int deinit_ss_zns_device(struct user_zns_device *my_dev) @@ -357,6 +427,7 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); free(info->curr_log_zone); + pthread_mutex_destroy(&info->size_limit_lock); pthread_mutex_destroy(&info->zones_lock); free(info); free(my_dev); @@ -391,19 +462,21 @@ static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages) pthread_mutex_unlock(&zone->write_ptr_lock); } -static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages) +static inline uint32_t get_block_index(unsigned long long page_addr, + uint32_t zone_num_pages) { return page_addr / zone_num_pages; } -static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages) +static inline uint32_t get_data_offset(unsigned long long page_addr, + uint32_t zone_num_pages) { return page_addr % zone_num_pages; } static void change_log_zone(zns_info *info) { - pthread_mutex_lock(&info->zones_lock); // Lock for changing used_log_zones and accessing free zones list; + pthread_mutex_lock(&info->zones_lock); if (info->used_log_zones) info->used_log_zones_tail->next = info->curr_log_zone; else @@ -426,7 +499,8 @@ static void change_log_zone(zns_info *info) } } -static void update_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr) +static void update_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr) { uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; @@ -488,12 +562,68 @@ static void update_map(zns_info *info, unsigned long long page_addr, unsigned lo pthread_mutex_unlock(&block->lock); } -static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size) +static unsigned request_transfer_size(zns_info *info, uint8_t type) +{ + if (type & sb_read) { + uint32_t max_transfer_size = info->mdts; + for (;;) { + if (info->free_transfer_size) { + pthread_mutex_lock(&info->size_limit_lock); + break; + } + } + if (info->used_status & sb_write) + max_transfer_size -= info->zasl; + if (info->used_status & (sb_read & ~type)) + max_transfer_size >>= 1; + if (info->free_transfer_size < max_transfer_size) + max_transfer_size = info->free_transfer_size; + info->free_transfer_size -= max_transfer_size; + info->used_status |= type; + pthread_mutex_unlock(&info->size_limit_lock); + return max_transfer_size; + } else { + uint32_t max_transfer_size = info->zasl; + for (;;) { + if (info->free_transfer_size && info->free_append_size) { + pthread_mutex_lock(&info->size_limit_lock); + break; + } + } + if (info->used_status & sb_write) + max_transfer_size >>= 1; + if (info->free_append_size < max_transfer_size) + max_transfer_size = info->free_append_size; + if (info->free_transfer_size < max_transfer_size) + max_transfer_size = info->free_transfer_size; + info->free_transfer_size -= max_transfer_size; + info->free_append_size -= max_transfer_size; + info->used_status |= type; + pthread_mutex_unlock(&info->size_limit_lock); + return max_transfer_size; + } +} + +static void free_transfer_size(zns_info *info, uint8_t type, unsigned size) +{ + pthread_mutex_lock(&info->size_limit_lock); + if (type & sb_write) + info->free_append_size += size; + info->free_transfer_size += size; + pthread_mutex_unlock(&info->size_limit_lock); +} + +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint32_t size, uint8_t type) { while (size) { - unsigned curr_read_size = size < info->mdts ? size : info->mdts; + unsigned curr_transfer_size = request_transfer_size(info, type); + unsigned curr_read_size = size < curr_transfer_size ? + size : curr_transfer_size; unsigned short num_pages = curr_read_size / info->page_size; - nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); + nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, + 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); + free_transfer_size(info, type, curr_transfer_size); physical_addr += num_pages; buffer = (char *)buffer + curr_read_size; size -= curr_read_size; @@ -501,16 +631,22 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, void return errno; } -static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, uint32_t size) +static int append_to_data_zone(zns_info *info, zone_info *zone, + void *buffer, uint32_t size, uint8_t type) { increase_write_ptr(zone, size / info->page_size); while (size) { unsigned long long physical_addr = 0ULL; - uint32_t curr_append_size = info->zasl; + unsigned curr_transfer_size = request_transfer_size(info, type); + unsigned curr_append_size = curr_transfer_size; if (curr_append_size > size) curr_append_size = size; - unsigned short num_curr_append_pages = curr_append_size / info->page_size; - nvme_zns_append(info->fd, info->nsid, zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); + unsigned short num_curr_append_pages = curr_append_size / + info->page_size; + nvme_zns_append(info->fd, info->nsid, zone->saddr, + num_curr_append_pages - 1, 0U, 0U, 0U, 0U, + curr_append_size, buffer, 0U, NULL, &physical_addr); + free_transfer_size(info, type, curr_transfer_size); if (errno) return errno; buffer = (char *)buffer + curr_append_size; @@ -519,13 +655,17 @@ static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, ui return errno; } -static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void *buffer, uint32_t size) +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, + void *buffer, uint32_t size) { while (size) { bool change = true; - uint32_t curr_append_size = (info->zone_num_pages - info->curr_log_zone->write_ptr) * info->page_size; - if (curr_append_size > info->zasl) { - curr_append_size = info->zasl; + unsigned curr_transfer_size = request_transfer_size(info, user_write); + unsigned curr_append_size = (info->zone_num_pages - + info->curr_log_zone->write_ptr) * + info->page_size; + if (curr_append_size > curr_transfer_size) { + curr_append_size = curr_transfer_size; change = false; } if (curr_append_size > size) { @@ -533,8 +673,12 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void change = false; } unsigned long long physical_addr = 0ULL; - unsigned short num_curr_append_pages = curr_append_size / info->page_size; - nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); + unsigned short num_curr_append_pages = curr_append_size / + info->page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, + num_curr_append_pages - 1, 0U, 0U, 0U, 0U, + curr_append_size, buffer, 0U, NULL, &physical_addr); + free_transfer_size(info, user_write, curr_transfer_size); if (errno) return errno; increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); @@ -549,29 +693,41 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void return errno; } -static int read_logical_block(zns_info *info, logical_block *block, void *buffer) +static int read_logical_block(zns_info *info, logical_block *block, + void *buffer) { //FIXME: Proision for contiguos block read, but not written if (block->data_zone) - read_from_zns(info, block->data_zone->saddr, buffer, block->data_zone->write_ptr * info->page_size); + read_from_zns(info, block->data_zone->saddr, + buffer, block->data_zone->write_ptr * info->page_size, + gc_read); page_map *prev = block->old_page_maps; page_map *start = block->old_page_maps; page_map *curr = block->old_page_maps->next; decrease_num_valid_page(prev->zone, 1U); while (curr) { - if (curr->page_addr - prev->page_addr != 1ULL || curr->physical_addr - prev->physical_addr != 1ULL) { - unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * info->page_size; - uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; - read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + if (curr->page_addr - prev->page_addr != 1ULL || + curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - + block->s_page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, + (char *)buffer + buff_offset, curr_read_size, + gc_read); start = curr; } decrease_num_valid_page(curr->zone, 1U); prev = curr; curr = curr->next; } - unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * info->page_size; - uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; - read_from_zns(info, start->physical_addr, (char *)buffer + buff_offset, curr_read_size); + unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * + info->page_size; + read_from_zns(info, start->physical_addr, + (char *)buffer + buff_offset, curr_read_size, gc_read); return errno; } @@ -581,18 +737,23 @@ static void merge(zns_info *info, logical_block *block) block->old_page_maps = block->page_maps; block->page_maps = NULL; pthread_mutex_unlock(&block->lock); - uint32_t size = get_data_offset(block->page_maps_tail->page_addr, info->zone_num_pages) + 1U; + uint32_t size = get_data_offset(block->page_maps_tail->page_addr, + info->zone_num_pages) + 1U; if (block->data_zone && block->data_zone->write_ptr > size) size = block->data_zone->write_ptr; size *= info->page_size; char buffer[size]; memset(buffer, 0, size); read_logical_block(info, block, buffer); + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~gc_read; + pthread_mutex_unlock(&info->size_limit_lock); pthread_mutex_lock(&block->lock); // Append old data zone to free zones list if (block->data_zone) { decrease_write_ptr(block->data_zone, block->data_zone->write_ptr); - nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, + false, NVME_ZNS_ZSA_RESET, 0U, NULL); pthread_mutex_lock(&info->zones_lock); if (info->free_zones) info->free_zones_tail->next = block->data_zone; @@ -611,7 +772,10 @@ static void merge(zns_info *info, logical_block *block) block->data_zone->next = NULL; --info->num_free_zones; pthread_mutex_unlock(&info->zones_lock); - append_to_data_zone(info, block->data_zone, buffer, size); + append_to_data_zone(info, block->data_zone, buffer, size, gc_write); + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~gc_write; + pthread_mutex_unlock(&info->size_limit_lock); while (block->old_page_maps) { page_map *tmp = block->old_page_maps; block->old_page_maps = block->old_page_maps->next; @@ -625,7 +789,8 @@ static void *garbage_collection(void *info_ptr) zns_info *info = (zns_info *)info_ptr; uint32_t index = 0U; while (info->run_gc) { - while (info->num_log_zones - info->num_used_log_zones > info->gc_wmark) { + while (info->num_log_zones - info->num_used_log_zones > + info->gc_wmark) { if (!info->run_gc) return NULL; } @@ -653,7 +818,8 @@ static void *garbage_collection(void *info_ptr) if (!curr->num_valid_pages) { // reset decrease_write_ptr(curr, curr->write_ptr); - nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, + false, NVME_ZNS_ZSA_RESET, 0U, NULL); pthread_mutex_lock(&info->zones_lock); // Remove from used_log_zones free = curr; From 991298dec8db0947a26797b06c5fea1a75a2b457 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Wed, 12 Oct 2022 18:01:24 +0000 Subject: [PATCH 049/101] Partial working code --- src/m45-rocksdb/S2FileSystem.cc | 458 +++++++++++++++++++++++++------- src/m45-rocksdb/S2FileSystem.h | 75 +++--- 2 files changed, 403 insertions(+), 130 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 0265a0b..e1421a7 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -65,7 +65,7 @@ namespace ROCKSDB_NAMESPACE int LookupMap_Delete(MYFS *FSObj, std::string id) { int index = LookupMap_HashFunction(id); - struct mapEntries *head, *tmp; + struct mapEntries *head, *tmp = NULL; head = FSObj->LookupCache[index]; while (head != NULL) @@ -79,6 +79,7 @@ namespace ROCKSDB_NAMESPACE free(head); break; } + tmp = head; head = head->chain; } @@ -190,10 +191,9 @@ namespace ROCKSDB_NAMESPACE // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool // return value will be 0 if asked child is not present - uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren) + uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren, std::string targetName = "") { // Check no of children and load it - // FIXME: Logic for rename uint64_t children_count = ptr->FileSize; MYFS_Dir *dir_ptr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); @@ -208,8 +208,18 @@ namespace ROCKSDB_NAMESPACE { if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) { - free(dir_ptr); - return dir_ptr->Entities[j].InodeNum; + if (targetName == "") + { + uint32_t ret = dir_ptr->Entities[j].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[j].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + return 0; + } } } } @@ -224,8 +234,18 @@ namespace ROCKSDB_NAMESPACE { if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) { - free(dir_ptr); - return dir_ptr->Entities[i].InodeNum; + if (targetName == NULL) + { + uint32_t ret = dir_ptr->Entities[i].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[i].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + return 0; + } } } } @@ -259,31 +279,38 @@ namespace ROCKSDB_NAMESPACE // Read parent dir and get asked inode number if (parentInode->FileSize == 0) return -1; - // Get Entity to search for std::string entityName; Get_EntityName(path, entityName); uint32_t index = Load_Children(FSObj, parentInode, entityName, NULL, false); - if (index) + if (!index) return -1; // Load the children index inode from disk and store in lookupMap; uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; ptr = (Inode **)calloc(1, sizeof(Inode)); isPresent = Load_From_NVM(FSObj, address, ptr, (uint64_t)INODE_SIZE); - if (isPresent) + if (!isPresent) return -1; // Put it in lookup Map LookupMap_Insert(FSObj, path, *ptr); - + std::cout << entityName << std::endl; return 0; } - int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) + int Rename_Child_In_Parent(MYFS *FSObj, std::string Ppath, std::string targetName, std::string srcName) { - // FIXME: Logic for deletion and rename + // FIXME: Logic for rename + Inode *parentInode; + int isPresent = Get_Path_Inode(FSObj, Ppath, &parentInode); + uint32_t rename = Load_Children(FSObj, parentInode, srcName, NULL, false, targetName); + return rename; + } + int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) + { + // FIXME: Logic for deletion Inode *ptr; int isPresent = Get_Path_Inode(FSObj, Ppath, &ptr); if (isPresent) @@ -335,12 +362,14 @@ namespace ROCKSDB_NAMESPACE */ int MYFS_CreateFile(MYFS *FSObj, std::string path) { + std::cout << "File creation : " << path << std::endl; uint32_t inode_no = get_FreeInode(FSObj); Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); // Fill the ptr std::string entityName; Get_EntityName(path, entityName); strcpy(ptr->EntityName, entityName.c_str()); + ptr->Inode_no = inode_no; // Update parent std::string parent; @@ -365,6 +394,7 @@ namespace ROCKSDB_NAMESPACE Get_EntityName(path, entityName); strcpy(ptr->EntityName, entityName.c_str()); ptr->IsDir = true; + ptr->Inode_no = inode_no; // Update parent std::string parent; @@ -430,9 +460,11 @@ namespace ROCKSDB_NAMESPACE this->FileSystemObj->DataBlockPtr = 0; // Reserved for Root Node this->FileSystemObj->InodePtr = 0; this->FileSystemObj->InodeBitMap[0] = true; + *(this->FileSystemObj->DataBitMap) = true; this->FileSystemObj->rootEntry = (Inode *)calloc(1, sizeof(Inode)); strcpy(this->FileSystemObj->rootEntry->EntityName, "tmp"); this->FileSystemObj->rootEntry->IsDir = true; + this->FileSystemObj->rootEntry->Inode_no = 0; this->FileSystemObj->rootEntry->FileSize = 0; this->FileSystemObj->rootEntry->Direct_data_lbas[0] = DATA_BLOCKS_OFFSET * this->FileSystemObj->LogicalBlockSize; } @@ -450,7 +482,15 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); + std::cout << "Seq File to access : " << fname << " " << isPresent << std::endl; + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_SequentialFile(fname, this->FileSystemObj)); + return IOStatus::OK(); } IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) @@ -468,7 +508,15 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); + std::cout << "Random file to access : " << fname << " " << isPresent << std::endl; + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_RandomAccessFile(fname, this->FileSystemObj)); + return IOStatus::OK(); } const char *S2FileSystem::Name() const @@ -486,21 +534,30 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { - std::cout<<"Writable file"<FileSystemObj, fname, &ptr); + std::cout << "fname : " << fname << " " << isPresent << std::endl; + if (isPresent) + MYFS_CreateFile(this->FileSystemObj, fname); + else + ptr->FileSize = 0; + + result->reset(); + result->reset(new MYFS_WritableFile(fname, this->FileSystemObj)); + return IOStatus::OK(); } IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, IODebugContext *) { - std::cout<<"Writable file"< *, IODebugContext *) { - std::cout<<"RWWritable file"< *result, IODebugContext *dbg) { - std::cout<<"New Directory : "<FileSystemObj, dir, &ptr); - std::cout << std::endl << std::endl; + std::cout << std::endl + << std::endl; if (isPresent) isPresent = MYFS_CreateDir(this->FileSystemObj, dir); if (isPresent) @@ -596,15 +654,16 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - //MYFS_DeletePath(this->FileSystemObj, fname); + // MYFS_DeletePath(this->FileSystemObj, fname); return IOStatus::OK(); } IOStatus S2FileSystem::NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, IODebugContext *dbg) { - std::cout<<"Logger \n"<FileSystemObj, target); // FIXME: Logic for rename // Change name in Inode // Change in parent - return IOStatus::IOError(__FUNCTION__); + + // verify if target exists + std::cout << "Rename file" << src << " " << target << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, target, &targetptr); + if (isPresent) + { + // if it is not present + // rename the inode + std::string entityName; + Get_EntityName(target, entityName); + Get_Path_Inode(this->FileSystemObj, src, &sourceptr); + strcpy(sourceptr->EntityName, entityName.c_str()); + LookupMap_Delete(this->FileSystemObj, src); + + LookupMap_Insert(this->FileSystemObj, target, sourceptr); + // rename the entity in the parent + std::string srcEntityName; + Get_EntityName(src, srcEntityName); + + std::string parentPath; + Get_ParentPath(target, parentPath); + int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, srcEntityName); + if (parentUpdated) + return IOStatus::IOError(__FUNCTION__); + } + else + { + } + return IOStatus::OK(); } IOStatus S2FileSystem::GetChildrenFileAttributes(const std::string &dir, const IOOptions &options, @@ -702,7 +790,7 @@ namespace ROCKSDB_NAMESPACE std::cout << "File Exists : " << fname << std::endl; int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); if (isPresent) - return IOStatus::IOError(__FUNCTION__); + return IOStatus::NotFound(); return IOStatus::OK(); } @@ -713,104 +801,280 @@ namespace ROCKSDB_NAMESPACE return IOStatus::IOError(__FUNCTION__); } + int load_nth_indirect_block(MYFS *FSObj, uint32_t n, uint64_t indirect_lba, Indirect_ptr **ptr) + { + for (int i = 0; i < n; i++) + Load_From_NVM(FSObj, (*ptr)->Indirect_ptr_lbas, *ptr, 4096); + } + int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) + { + uint32_t curr = offset / 4096, end = size / 4096; + uint64_t if_dirty_addr; + uint64_t *data_block_lba_ptr, next_indirect_block_addr; + uint32_t no_of_data_block_ptrs; + Indirect_ptr *iptr = NULL; + // Load the direct ptr + if (curr < 480) + { + // In Inode block itself + data_block_lba_ptr = ptr->Direct_data_lbas; + no_of_data_block_ptrs = 480; + next_indirect_block_addr = ptr->Indirect_ptr_lbas; + if_dirty_addr = 4096 + (ptr->Inode_no * INODE_SIZE); + } + else + { + curr -= 480; + int nth_indirect = curr / 510; + iptr = (Indirect_ptr *)calloc(1, 4096); + Load_From_NVM(FSObj, ptr->Indirect_ptr_lbas, iptr, 4096); + for (int i = 0; i < nth_indirect; i++) + Load_From_NVM(FSObj, iptr->Indirect_ptr_lbas, iptr, 4096); + + data_block_lba_ptr = iptr->Direct_data_lbas; + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + curr = curr % 510; + if_dirty_addr = iptr->Current_addr; + } + uint64_t addr; + for (int i = 0; i <= end; i++) + { + addr = *(data_block_lba_ptr + curr); + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + *data_block_lba_ptr = addr; + } + addresses->push_back(addr); + curr++; - uint64_t get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addressess, bool forWrite) { - - } - - - //MYFS_File definition - MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) - { - this->FSObj = FSObj; - Get_Path_Inode(FSObj, filePath, &(this->ptr)); - this->curr_read_offset = 0; - } + if (curr == no_of_data_block_ptrs) + { + if (!next_indirect_block_addr) + { + // If no indirect block ptr, create one and store to mem + next_indirect_block_addr = get_FreeDataBlock(FSObj); + if (iptr == NULL) + { + ptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + } + else + { + iptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + } + iptr = {0}; + iptr->Current_addr = next_indirect_block_addr; + } + else + { + Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); + } + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + data_block_lba_ptr = iptr->Direct_data_lbas; + curr = 0; + } + } - int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) - { - if(ptr->FileSize < offset+size) - return -1; + // Store dirty block to NVM + if (iptr == NULL) + { + // addresses->push_back(); + Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + } + else + { + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + } - std::vector *addresses_to_read; - uint64_t addr = get_blocks_addr(this->FSObj, this->ptr, offset, size, addresses_to_read, false); - if(!addr) - return -1; - char *readD = (char *) calloc(addresses_to_read->size(), 4096); + free(iptr); + return 0; + } - for(int i=0;isize();i++) - Load_From_NVM(this->FSObj, addresses_to_read->at(i),readD+(i*4096), 4096); + // MYFS_File definition + MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) + { + this->FSObj = FSObj; + Get_Path_Inode(FSObj, filePath, &(this->ptr)); + this->curr_read_offset = 0; + } - int smargin = offset % 4096; - memcpy(data, readD+smargin, size); - free(readD); - return 0; - } + int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) + { + std::cout<<"Read on : "<ptr->EntityName<<" "<ptr->FileSize<<" "<FileSize < offset + size) { + if(offset >= ptr->FileSize) + return 0; + size = ptr->FileSize - offset; + } + + std::vector addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) + return -1; + + char *readD = (char *)calloc(addresses_to_read.size(), 4096); + for (int i = 0; i < addresses_to_read.size(); i++) + Load_From_NVM(this->FSObj, addresses_to_read.at(i), readD + (i * 4096), 4096); + int smargin = offset % 4096; + memcpy(data, readD + smargin, size); + free(readD); + return size; + } - int MYFS_File::Read(uint64_t size, char *data) - { - //Check with file size - int err = this->PRead(this->curr_read_offset, size, data); - if (err) - return err; + int MYFS_File::Read(uint64_t size, char *data) + { + // Check with file size + int sizeW = this->PRead(this->curr_read_offset, size, data); this->curr_read_offset += size; + return sizeW; + } + + int MYFS_File::Seek(uint64_t offset) + { + if (ptr->FileSize < this->curr_read_offset + offset) + return -1; + this->curr_read_offset += offset; return 0; - } - - - int MYFS_File::Seek(uint64_t offset) - { - if(ptr->FileSize < offset) - return -1; - this->curr_read_offset = offset; - return 0; - } - - int MYFS_File::Truncate(uint64_t size) - { - //TODO: Free Data Block + } + + int MYFS_File::Truncate(uint64_t size) + { + // TODO: Free Data Block this->ptr->FileSize = size; return 0; - } + } - int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) { - std::vector *addresses_to_read; - uint64_t addr = get_blocks_addr(this->FSObj, this->ptr, offset, size, addresses_to_read, false); - if(!addr) + int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) + { + std::cout<<"Append on : "<ptr->EntityName<<" "< addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) return -1; - - //Do read-modify-update cycle if smargin is present on 1st address. + + // Do read-modify-update cycle if smargin is present on 1st address. int smargin = offset % 4096; - char *buffer = (char *) calloc(addresses_to_read->size(), 4096); - if (smargin) - Load_From_NVM(this->FSObj, addresses_to_read->at(0),buffer, 4096); + char *buffer = (char *)calloc(addresses_to_read.size(), 4096); + if (smargin) + Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); - memcpy(buffer+smargin, data, size); - for(int i=0; isize(); i++) - Store_To_NVM(this->FSObj, addresses_to_read->at(i), data+(i*4096), 4096); + memcpy(buffer + smargin, data, size); + for (int i = 0; i < addresses_to_read.size(); i++) + Store_To_NVM(this->FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); - //Update file size + // Update file size this->ptr->FileSize = offset + size; free(buffer); - } + return 0; + } - int MYFS_File::Append(uint64_t size, char *data) { + int MYFS_File::Append(uint64_t size, char *data) + { return this->PAppend(ptr->FileSize, size, data); } - int MYFS_File::Close() { - //Flush Inode changes to Disk + uint64_t MYFS_File::GetFileSize() + { + return this->ptr->FileSize; + } + + int MYFS_File::Close() + { + // Flush Inode changes to Disk + } + + // Def of MYFS_SequentialFile + MYFS_SequentialFile::MYFS_SequentialFile(std::string fpath, MYFS *FSObj) + { + this->fp = new MYFS_File(fpath, FSObj); + } + + IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) + { + + int sizeW = this->fp->Read(n, scratch); + std::cout<<"Read done with : "<size()<fp->PRead(offset, n, scratch); + // if (err) + // return IOStatus::IOError(__FUNCTION__); + // *result = Slice(scratch, n); + // return IOStatus::OK(); + // } + + IOStatus MYFS_SequentialFile::Skip(uint64_t n) + { + int err = this->fp->Seek(n); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } - + // Def MYFS_RandomAccessFile + MYFS_RandomAccessFile::MYFS_RandomAccessFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + } + + IOStatus MYFS_RandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, + IODebugContext *dbg) const + { + std::cout<<"Read here"<fp->PRead(offset, n, scratch); + *result = Slice(scratch, sizeW); + return IOStatus::OK(); + } + + // Def MYFS_WritableFile + MYFS_WritableFile::MYFS_WritableFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + } + + IOStatus MYFS_WritableFile::Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) + { + int err = this->fp->Truncate(size); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) + { + char *block = (char *)data.data(); + uint64_t size = data.size(); + int err = this->fp->Append(size, block); + std::cout<fp = MYFS_File(fpath, FSObj); + IOStatus MYFS_WritableFile::PositionedAppend(const Slice &data, uint64_t offset, const IOOptions &opts, + IODebugContext *dbg) + { + + char *block = (char *)data.data(); + uint64_t size = data.size(); + int err = this->fp->PAppend(offset, size, block); + std::cout<<"PAppend size : "<fp;} virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, - char *scratch, IODebugContext *dbg) override{}; - virtual IOStatus PositionedRead(uint64_t offset, size_t n, - const IOOptions &opts, Slice *result, - char *scratch, IODebugContext *dbg) override; + char *scratch, IODebugContext *dbg)override; + virtual IOStatus Skip(uint64_t n) override; - virtual IOStatus InvalidateCache(size_t offset, size_t length) override - { - return IOStatus::OK(); - }; - virtual bool use_direct_io() const override { return false; } - virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + // virtual IOStatus PositionedRead(uint64_t offset, size_t n, + // const IOOptions &opts, Slice *result, + // char *scratch, IODebugContext *dbg) override; + // virtual IOStatus InvalidateCache(size_t offset, size_t length) override + // { + // return IOStatus::OK(); + // }; + // virtual bool use_direct_io() const override { return true; } + // virtual size_t GetRequiredBufferAlignment() const override { return 4096; } }; class MYFS_RandomAccessFile : public FSRandomAccessFile { private: - MYFS_File fp; + MYFS_File *fp; public: - MYFS_RandomAccessFile(const std::string &fname, MYFS *FSObj); - virtual ~MYFS_RandomAccessFile(); + MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); + virtual ~MYFS_RandomAccessFile(){delete this->fp;} virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, - Slice *result, char *scratch, - IODebugContext *dbg) const override; - + Slice *result, char *scratch, IODebugContext *dbg) const override; + /* virtual IOStatus MultiRead(FSReadRequest *reqs, size_t num_reqs, const IOOptions &options, - IODebugContext *dbg) override; + IODebugContext *dbg) {std::cout<<"MULTIREAD"<fp;} virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) override; - virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) override; + virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) {return IOStatus::OK();}; virtual IOStatus Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) override; + virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + /* virtual IOStatus Append(const Slice &data, const IOOptions &opts, - const DataVerificationInfo & /* verification_info */, + const DataVerificationInfo & /* verification_info , IODebugContext *dbg) override { return Append(data, opts, dbg); @@ -190,21 +199,21 @@ namespace ROCKSDB_NAMESPACE const IOOptions &opts, IODebugContext *dbg) override; virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, - const IOOptions &opts, const DataVerificationInfo & /* verification_info */, + const IOOptions &opts, const DataVerificationInfo & /* verification_info, IODebugContext *dbg) override { return PositionedAppend(data, offset, opts, dbg); } - virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } - virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Fsync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } virtual bool IsSyncThreadSafe() const { return false; } - virtual bool use_direct_io() const override { return false; } - virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override; + virtual bool use_direct_io() const override { return true; } + virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {} virtual uint64_t GetFileSize(const IOOptions &opts, - IODebugContext *dbg) override; + IODebugContext *dbg) override {std::cout<<"Calling this module"<fp->GetFileSize();} virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); } virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + */ }; class MYFS_Directory : public FSDirectory From 483914feaa652f7770e3ce927ce759d96deac039 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Wed, 12 Oct 2022 18:04:25 +0000 Subject: [PATCH 050/101] conflict resolved --- CMakeLists.txt | 2 +- src/m23-ftl/zns_device.cpp | 4 ++++ src/m45-rocksdb/rocks_s2fs.cc | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8879913..b38dd9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(CMAKE_LIBRARY_PATH "/home/$ENV{USER}/local/lib/") set(CMAKE_INSTALL_PREFIX "/home/$ENV{USER}/local/") # Project configuration specific parameters -set(STOSYS_M45 OFF) +set(STOSYS_M45 ON) set(STOSYS_CMAKE_DEBUG OFF) set(STOSYS_ASAN ON) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 15bef92..43b28a1 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -262,6 +262,10 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, uint32_t index = get_block_index(page_addr, info->zone_num_pages); uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; + if (block == NULL) + return -1; + uint32_t offset = get_data_offset(logical_page_addr, + info->zone_num_pages); uint32_t curr_block_read_size = (info->zone_num_pages - offset) * info->page_size; if (curr_block_read_size > size) diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index 1ec8443..94a0e86 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(false){ + if(true){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { From 18634fe56c9f2f7aab99a4dd90d0376fe68af86b Mon Sep 17 00:00:00 2001 From: yssamtu Date: Wed, 12 Oct 2022 18:08:27 +0000 Subject: [PATCH 051/101] ftl bitmap used page --- src/m23-ftl/zns_device.cpp | 46 ++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 15bef92..48c36d6 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -67,6 +67,7 @@ struct logical_block { page_map *old_page_maps; page_map *page_maps_tail; zone_info *data_zone; // block mapping for this logical block (data zone) + uint8_t *bitmap; //TODO: LOCK the access pthread_mutex_t lock; }; @@ -112,13 +113,17 @@ static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages); static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); +static bool read_bitmap(logical_block *block, + uint32_t offset, uint32_t num_pages); +static void write_bitmap(logical_block *block, + uint32_t offset, uint32_t num_pages); static void change_log_zone(zns_info *info); -static void update_map(zns_info *info, unsigned long long page_addr, - unsigned long long physical_addr); +static void update_page_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr); static unsigned request_transfer_size(zns_info *info, uint8_t type); static void free_transfer_size(zns_info *info, uint8_t type, unsigned size); static int read_from_zns(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size, uint8_t type); + void *buffer, uint32_t size, uint8_t type); static int append_to_data_zone(zns_info *info, zone_info *zone, void *buffer, uint32_t size, uint8_t type); static int append_to_log_zone(zns_info *info, unsigned long long page_addr, @@ -245,6 +250,10 @@ int init_ss_zns_device(struct zdev_init_params *params, sizeof(logical_block)); for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(info->num_data_zones * + info->zone_num_pages >> 3UL, + sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } //Start GC @@ -266,6 +275,8 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; + if (!read_bitmap(block, offset, curr_block_read_size / info->page_size)) + return 1; pthread_mutex_lock(&block->lock); if (block->data_zone) { uint32_t curr_read_size = block->data_zone->write_ptr * @@ -330,9 +341,9 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, while (size) { uint32_t index = get_block_index(address / info->page_size, info->zone_num_pages); - logical_block *block = &info->logical_blocks[index]; uint32_t offset = get_data_offset(address / info->page_size, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; uint32_t curr_append_size = 0U; pthread_mutex_lock(&block->lock); // if can write to data zone directly @@ -377,6 +388,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; } + write_bitmap(block, offset, curr_append_size / info->page_size); address += curr_append_size; buffer = (char *)buffer + curr_append_size; size -= curr_append_size; @@ -407,6 +419,7 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); free(blocks[i].data_zone); } + free(blocks[i].bitmap); pthread_mutex_destroy(&blocks[i].lock); } free(blocks); @@ -474,6 +487,25 @@ static inline uint32_t get_data_offset(unsigned long long page_addr, return page_addr % zone_num_pages; } +static bool read_bitmap(logical_block *block, + uint32_t offset, uint32_t num_pages) +{ + while (num_pages) { + if (!(block->bitmap[offset >> 3U] & 1U << (offset & 0x7U))) + return false; + } + return true; +} + +static void write_bitmap(logical_block *block, + uint32_t offset, uint32_t num_pages) +{ + while (num_pages--) { + block->bitmap[offset >> 3U] |= 1U << (offset & 0x7U); + ++offset; + } +} + static void change_log_zone(zns_info *info) { pthread_mutex_lock(&info->zones_lock); @@ -499,8 +531,8 @@ static void change_log_zone(zns_info *info) } } -static void update_map(zns_info *info, unsigned long long page_addr, - unsigned long long physical_addr) +static void update_page_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr) { uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; @@ -684,7 +716,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); increase_write_ptr(info->curr_log_zone, num_curr_append_pages); for (uint32_t i = 0U; i < num_curr_append_pages; ++i) - update_map(info, page_addr++, physical_addr++); + update_page_map(info, page_addr++, physical_addr++); if (change) change_log_zone(info); buffer = (char *)buffer + curr_append_size; From 3c4235e2cb627e48b61de4a6e6c0afb3a69ebdd0 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Wed, 12 Oct 2022 18:08:57 +0000 Subject: [PATCH 052/101] Revert "conflict resolved" This reverts commit 483914feaa652f7770e3ce927ce759d96deac039. --- CMakeLists.txt | 2 +- src/m23-ftl/zns_device.cpp | 4 ---- src/m45-rocksdb/rocks_s2fs.cc | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b38dd9e..8879913 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(CMAKE_LIBRARY_PATH "/home/$ENV{USER}/local/lib/") set(CMAKE_INSTALL_PREFIX "/home/$ENV{USER}/local/") # Project configuration specific parameters -set(STOSYS_M45 ON) +set(STOSYS_M45 OFF) set(STOSYS_CMAKE_DEBUG OFF) set(STOSYS_ASAN ON) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 43b28a1..15bef92 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -262,10 +262,6 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, uint32_t index = get_block_index(page_addr, info->zone_num_pages); uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - if (block == NULL) - return -1; - uint32_t offset = get_data_offset(logical_page_addr, - info->zone_num_pages); uint32_t curr_block_read_size = (info->zone_num_pages - offset) * info->page_size; if (curr_block_read_size > size) diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index 94a0e86..1ec8443 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(true){ + if(false){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { From 6e32f13722aed7e3cc0a4d98b207def0997acd41 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Wed, 12 Oct 2022 21:18:13 +0000 Subject: [PATCH 053/101] add bitmap --- src/m23-ftl/zns_device.cpp | 126 +++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 941ff4e..fb59f3c 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -119,7 +119,8 @@ static void write_bitmap(logical_block *block, uint32_t offset, uint32_t num_pages); static void change_log_zone(zns_info *info); static void update_page_map(zns_info *info, unsigned long long page_addr, - unsigned long long physical_addr); + unsigned long long physical_addr, + uint32_t num_pages); static unsigned request_transfer_size(zns_info *info, uint8_t type); static void free_transfer_size(zns_info *info, uint8_t type, unsigned size); static int read_from_zns(zns_info *info, unsigned long long physical_addr, @@ -271,16 +272,12 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, uint32_t index = get_block_index(page_addr, info->zone_num_pages); uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; - if (block == NULL) - return -1; - uint32_t offset = get_data_offset(logical_page_addr, - info->zone_num_pages); uint32_t curr_block_read_size = (info->zone_num_pages - offset) * info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; if (!read_bitmap(block, offset, curr_block_read_size / info->page_size)) - return 1; + return -1; pthread_mutex_lock(&block->lock); if (block->data_zone) { uint32_t curr_read_size = block->data_zone->write_ptr * @@ -494,9 +491,10 @@ static inline uint32_t get_data_offset(unsigned long long page_addr, static bool read_bitmap(logical_block *block, uint32_t offset, uint32_t num_pages) { - while (num_pages) { + while (num_pages--) { if (!(block->bitmap[offset >> 3U] & 1U << (offset & 0x7U))) return false; + ++offset; } return true; } @@ -536,66 +534,71 @@ static void change_log_zone(zns_info *info) } static void update_page_map(zns_info *info, unsigned long long page_addr, - unsigned long long physical_addr) + unsigned long long physical_addr, + uint32_t num_pages) { - uint32_t index = get_block_index(page_addr, info->zone_num_pages); - logical_block *block = &info->logical_blocks[index]; - //Lock for updating page map - pthread_mutex_lock(&block->lock); - if (!block->page_maps) { - block->page_maps = (page_map *)calloc(1, sizeof(page_map)); - block->page_maps_tail = block->page_maps; - block->page_maps->page_addr = page_addr; - block->page_maps->physical_addr = physical_addr; - block->page_maps->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); - return; - } - if (block->page_maps->page_addr == page_addr) { - //Update log counter - decrease_num_valid_page(block->page_maps->zone, 1U); - block->page_maps->physical_addr = physical_addr; - block->page_maps->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); - return; - } - if (block->page_maps->page_addr > page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); - tmp->next = block->page_maps; - block->page_maps = tmp; - tmp->page_addr = page_addr; - tmp->physical_addr = physical_addr; - tmp->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); - return; - } - page_map *ptr = block->page_maps; - while (ptr->next) { - if (ptr->next->page_addr == page_addr) { + while (num_pages--) { + uint32_t index = get_block_index(page_addr, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + //Lock for updating page map + pthread_mutex_lock(&block->lock); + if (!block->page_maps) { + block->page_maps = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps_tail = block->page_maps; + block->page_maps->page_addr = page_addr; + block->page_maps->physical_addr = physical_addr; + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + return; + } + if (block->page_maps->page_addr == page_addr) { //Update log counter - decrease_num_valid_page(ptr->next->zone, 1U); - ptr->next->physical_addr = physical_addr; - ptr->next->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); - return; - } else if (ptr->next->page_addr > page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); - tmp->next = ptr->next; - ptr->next = tmp; + decrease_num_valid_page(block->page_maps->zone, 1U); + block->page_maps->physical_addr = physical_addr; + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + return; + } + if (block->page_maps->page_addr > page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = block->page_maps; + block->page_maps = tmp; tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + pthread_mutex_unlock(&block->lock); return; } - ptr = ptr->next; + page_map *ptr = block->page_maps; + while (ptr->next) { + if (ptr->next->page_addr == page_addr) { + //Update log counter + decrease_num_valid_page(ptr->next->zone, 1U); + ptr->next->physical_addr = physical_addr; + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + return; + } else if (ptr->next->page_addr > page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = ptr->next; + ptr->next = tmp; + tmp->page_addr = page_addr; + tmp->physical_addr = physical_addr; + tmp->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + return; + } + ptr = ptr->next; + } + ptr->next = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps_tail = ptr->next; + ptr->next->page_addr = page_addr; + ptr->next->physical_addr = physical_addr; + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + ++page_addr; + ++physical_addr; } - ptr->next = (page_map *)calloc(1, sizeof(page_map)); - block->page_maps_tail = ptr->next; - ptr->next->page_addr = page_addr; - ptr->next->physical_addr = physical_addr; - ptr->next->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); } static unsigned request_transfer_size(zns_info *info, uint8_t type) @@ -719,10 +722,11 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, return errno; increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); increase_write_ptr(info->curr_log_zone, num_curr_append_pages); - for (uint32_t i = 0U; i < num_curr_append_pages; ++i) - update_page_map(info, page_addr++, physical_addr++); + update_page_map(info, page_addr, physical_addr, num_curr_append_pages); if (change) change_log_zone(info); + page_addr += num_curr_append_pages; + physical_addr += num_curr_append_pages; buffer = (char *)buffer + curr_append_size; size -= curr_append_size; } From d3b37015c1f968da6bbc01f32f026d85b2148593 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Thu, 13 Oct 2022 10:36:50 +0000 Subject: [PATCH 054/101] Working patch 1 --- src/m45-rocksdb/S2FileSystem.cc | 135 ++++++++++++++++++++++++-------- src/m45-rocksdb/S2FileSystem.h | 20 ++--- 2 files changed, 108 insertions(+), 47 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index e1421a7..4e5bb0f 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -189,10 +189,20 @@ namespace ROCKSDB_NAMESPACE entityName = path.substr(index + 1, path.size()); } + void Clean_Path(std::string path, std::string &newPath) + { + std::string entity; + Get_EntityName(path, entity); + Get_ParentPath(path, newPath); + newPath.append("/"); + newPath.append(entity); + } + // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool // return value will be 0 if asked child is not present uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren, std::string targetName = "") { + // Check no of children and load it uint64_t children_count = ptr->FileSize; @@ -218,6 +228,7 @@ namespace ROCKSDB_NAMESPACE { strcpy(dir_ptr->Entities[j].EntityName, targetName.c_str()); Store_To_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + free(dir_ptr); return 0; } } @@ -232,9 +243,11 @@ namespace ROCKSDB_NAMESPACE children->push_back(dir_ptr->Entities[i].EntityName); else { + std::cout<<"couter"<Entities[i].EntityName, entityName.c_str())) { - if (targetName == NULL) + std::cout<<"couter1"<Entities[i].InodeNum; free(dir_ptr); @@ -244,12 +257,12 @@ namespace ROCKSDB_NAMESPACE { strcpy(dir_ptr->Entities[i].EntityName, targetName.c_str()); Store_To_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + free(dir_ptr); return 0; } } } } - free(dir_ptr); return 0; } @@ -388,7 +401,7 @@ namespace ROCKSDB_NAMESPACE { uint32_t inode_no = get_FreeInode(FSObj); Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); - + std::cout<<"Dir creation : "<_zns_dev); + free(params.name); if (ret != 0) { std::cout << "Error: " << uri_db_path << " failed to open the device " << device.c_str() << "\n"; @@ -471,6 +485,22 @@ namespace ROCKSDB_NAMESPACE S2FileSystem::~S2FileSystem() { + deinit_ss_zns_device(this->FileSystemObj->zns); + //TODO: Store before Free + free(this->FileSystemObj->rootEntry); + free(this->FileSystemObj->DataBitMap); + + for(int i=0;iFileSystemObj->LookupCache[i], *tmp; + while(head!=NULL) { + std::cout<<"Fixing leak"<chain; + free(tmp->ptr); + free(tmp); + } + } + free(this->FileSystemObj); } // Create a brand new sequentially-readable file with the specified name. @@ -482,14 +512,16 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { + std::string cpath; + Clean_Path(fname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); - std::cout << "Seq File to access : " << fname << " " << isPresent << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + std::cout << "Seq File to access : " << cpath << " " << isPresent << std::endl; if (isPresent) return IOStatus::IOError(__FUNCTION__); result->reset(); - result->reset(new MYFS_SequentialFile(fname, this->FileSystemObj)); + result->reset(new MYFS_SequentialFile(cpath, this->FileSystemObj)); return IOStatus::OK(); } @@ -508,14 +540,16 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { + std::string cpath; + Clean_Path(fname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); - std::cout << "Random file to access : " << fname << " " << isPresent << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + std::cout << "Random file to access : " << cpath << " " << isPresent << std::endl; if (isPresent) return IOStatus::IOError(__FUNCTION__); result->reset(); - result->reset(new MYFS_RandomAccessFile(fname, this->FileSystemObj)); + result->reset(new MYFS_RandomAccessFile(cpath, this->FileSystemObj)); return IOStatus::OK(); } @@ -534,16 +568,18 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg) { + std::string cpath; + Clean_Path(fname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); - std::cout << "fname : " << fname << " " << isPresent << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + std::cout << "fname : " << cpath << " " << isPresent << std::endl; if (isPresent) - MYFS_CreateFile(this->FileSystemObj, fname); + MYFS_CreateFile(this->FileSystemObj, cpath); else ptr->FileSize = 0; result->reset(); - result->reset(new MYFS_WritableFile(fname, this->FileSystemObj)); + result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); return IOStatus::OK(); } @@ -577,7 +613,10 @@ namespace ROCKSDB_NAMESPACE S2FileSystem::NewDirectory(const std::string &name, const IOOptions &io_opts, std::unique_ptr *result, IODebugContext *dbg) { + std::cout << "New Directory : " << name << std::endl; + result->reset(); + result->reset(new MYFS_Directory(this->FileSystemObj)); return IOStatus::OK(); } @@ -594,10 +633,12 @@ namespace ROCKSDB_NAMESPACE // Create the specified directory. Returns error if directory exists. IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + std::string cpath; + Clean_Path(dirname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, dirname, &ptr); + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); if (isPresent) - isPresent = MYFS_CreateDir(this->FileSystemObj, dirname); + isPresent = MYFS_CreateDir(this->FileSystemObj, cpath); else return IOStatus::IOError(__FUNCTION__); @@ -608,9 +649,11 @@ namespace ROCKSDB_NAMESPACE // Creating. IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + std::string cpath; + Clean_Path(dirname, cpath); Inode *ptr; - std::cout << "If dir missing : " << dirname << std::endl; - std::string dir = dirname.substr(0, dirname.size() - 1); + std::cout << "If dir missing : " << cpath << std::endl; + std::string dir = cpath.substr(0, cpath.size() - 1); int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); std::cout << std::endl << std::endl; @@ -624,8 +667,10 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { + std::string cpath; + Clean_Path(fname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, fname, &ptr); + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); if (isPresent) return IOStatus::IOError(__FUNCTION__); else @@ -717,6 +762,9 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::RenameFile(const std::string &src, const std::string &target, const IOOptions &options, IODebugContext *dbg) { + std::string cpath_target, cpath_src; + Clean_Path(src, cpath_src); + Clean_Path(target, cpath_target); Inode *targetptr, *sourceptr; // MYFS_DeletePath(this->FileSystemObj, target); // FIXME: Logic for rename @@ -724,25 +772,25 @@ namespace ROCKSDB_NAMESPACE // Change in parent // verify if target exists - std::cout << "Rename file" << src << " " << target << std::endl; - int isPresent = Get_Path_Inode(this->FileSystemObj, target, &targetptr); + std::cout << "Rename file" << cpath_src << " " << cpath_target << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); if (isPresent) { // if it is not present // rename the inode std::string entityName; - Get_EntityName(target, entityName); - Get_Path_Inode(this->FileSystemObj, src, &sourceptr); + Get_EntityName(cpath_target, entityName); + Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); strcpy(sourceptr->EntityName, entityName.c_str()); - LookupMap_Delete(this->FileSystemObj, src); + LookupMap_Delete(this->FileSystemObj, cpath_src); - LookupMap_Insert(this->FileSystemObj, target, sourceptr); + LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); // rename the entity in the parent std::string srcEntityName; - Get_EntityName(src, srcEntityName); + Get_EntityName(cpath_src, srcEntityName); std::string parentPath; - Get_ParentPath(target, parentPath); + Get_ParentPath(cpath_target, parentPath); int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, srcEntityName); if (parentUpdated) return IOStatus::IOError(__FUNCTION__); @@ -769,13 +817,18 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, IODebugContext *dbg) { + std::string cpath; + Get_ParentPath(dir, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); + + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + std::cout<<"Get children : "<FileSystemObj, ptr, "", result, true); - if (!err) + if (err) return IOStatus::IOError(__FUNCTION__); + std::cout<<"Result size : "<size()<FileSystemObj, fname, &ptr); + std::string cpath; + Clean_Path(fname, cpath); + std::cout << "File Exists : " << cpath << std::endl; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); if (isPresent) return IOStatus::NotFound(); return IOStatus::OK(); @@ -809,7 +864,7 @@ namespace ROCKSDB_NAMESPACE int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) { - uint32_t curr = offset / 4096, end = size / 4096; + uint32_t curr = offset / 4096, end = (offset+size) / 4096; uint64_t if_dirty_addr; uint64_t *data_block_lba_ptr, next_indirect_block_addr; uint32_t no_of_data_block_ptrs; @@ -861,17 +916,22 @@ namespace ROCKSDB_NAMESPACE { ptr->Indirect_ptr_lbas = next_indirect_block_addr; Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + } else { iptr->Indirect_ptr_lbas = next_indirect_block_addr; Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + free(iptr); } - iptr = {0}; + iptr = (Indirect_ptr *)calloc(1, 4096); iptr->Current_addr = next_indirect_block_addr; } else { + if (iptr == NULL) + iptr = (Indirect_ptr *)calloc(1, 4096); + Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); } next_indirect_block_addr = iptr->Indirect_ptr_lbas; @@ -932,7 +992,7 @@ namespace ROCKSDB_NAMESPACE { // Check with file size int sizeW = this->PRead(this->curr_read_offset, size, data); - this->curr_read_offset += size; + this->curr_read_offset += sizeW; return sizeW; } @@ -965,6 +1025,7 @@ namespace ROCKSDB_NAMESPACE if (smargin) Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); + std::cout<<"memcpy : "<FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); @@ -977,7 +1038,7 @@ namespace ROCKSDB_NAMESPACE int MYFS_File::Append(uint64_t size, char *data) { - return this->PAppend(ptr->FileSize, size, data); + return this->PAppend(this->ptr->FileSize, size, data); } uint64_t MYFS_File::GetFileSize() @@ -1055,14 +1116,20 @@ namespace ROCKSDB_NAMESPACE IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) { + char *block = (char *)data.data(); uint64_t size = data.size(); + std::cout<<"Appending "<fp->Append(size, block); std::cout<fp;} + virtual ~MYFS_SequentialFile(){std::cout<<"Close the file : "<fp;} virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg)override; @@ -156,7 +156,7 @@ namespace ROCKSDB_NAMESPACE public: MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); - virtual ~MYFS_RandomAccessFile(){delete this->fp;} + virtual ~MYFS_RandomAccessFile(){std::cout<<"Random Close the file : "<fp;} virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) const override; /* @@ -218,21 +218,15 @@ namespace ROCKSDB_NAMESPACE class MYFS_Directory : public FSDirectory { - /* + private: + MYFS *fp; public: + MYFS_Directory(MYFS *FSObj){} + ~MYFS_Directory(){} virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { + std::cout<<"Sample"< Date: Thu, 13 Oct 2022 13:00:58 +0000 Subject: [PATCH 055/101] Working patch with 1 leak --- src/m45-rocksdb/S2FileSystem.cc | 6 +++--- src/m45-rocksdb/rocks_s2fs.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 4e5bb0f..1ea54a9 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -110,13 +110,13 @@ namespace ROCKSDB_NAMESPACE { // Check the size if quantization of LBA int err = zns_udevice_read(FSObj->zns, addr, buffer, size); - return err; + return 0; } int Store_To_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) { int err = zns_udevice_write(FSObj->zns, addr, buffer, size); - return err; + return 0; } uint32_t get_FreeInode(MYFS *FSObj) @@ -931,7 +931,7 @@ namespace ROCKSDB_NAMESPACE { if (iptr == NULL) iptr = (Indirect_ptr *)calloc(1, 4096); - + Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); } next_indirect_block_addr = iptr->Indirect_ptr_lbas; diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index 1ec8443..94a0e86 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(false){ + if(true){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { From 1d37cb5ff90b2c6bd99c8d18b6e52756e0ae4666 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Thu, 13 Oct 2022 15:14:08 +0000 Subject: [PATCH 056/101] WP1 with leaks cleaned from couts --- src/m45-rocksdb/S2FileSystem.cc | 33 ++------------------------------- src/m45-rocksdb/S2FileSystem.h | 9 ++++----- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 1ea54a9..8b5e972 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -243,10 +243,8 @@ namespace ROCKSDB_NAMESPACE children->push_back(dir_ptr->Entities[i].EntityName); else { - std::cout<<"couter"<Entities[i].EntityName, entityName.c_str())) { - std::cout<<"couter1"<Entities[i].InodeNum; @@ -308,7 +306,6 @@ namespace ROCKSDB_NAMESPACE // Put it in lookup Map LookupMap_Insert(FSObj, path, *ptr); - std::cout << entityName << std::endl; return 0; } @@ -375,7 +372,6 @@ namespace ROCKSDB_NAMESPACE */ int MYFS_CreateFile(MYFS *FSObj, std::string path) { - std::cout << "File creation : " << path << std::endl; uint32_t inode_no = get_FreeInode(FSObj); Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); // Fill the ptr @@ -401,7 +397,6 @@ namespace ROCKSDB_NAMESPACE { uint32_t inode_no = get_FreeInode(FSObj); Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); - std::cout<<"Dir creation : "<FileSystemObj->LookupCache[i], *tmp; while(head!=NULL) { - std::cout<<"Fixing leak"<chain; free(tmp->ptr); @@ -516,7 +510,6 @@ namespace ROCKSDB_NAMESPACE Clean_Path(fname, cpath); Inode *ptr; int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - std::cout << "Seq File to access : " << cpath << " " << isPresent << std::endl; if (isPresent) return IOStatus::IOError(__FUNCTION__); @@ -544,7 +537,6 @@ namespace ROCKSDB_NAMESPACE Clean_Path(fname, cpath); Inode *ptr; int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - std::cout << "Random file to access : " << cpath << " " << isPresent << std::endl; if (isPresent) return IOStatus::IOError(__FUNCTION__); @@ -572,7 +564,6 @@ namespace ROCKSDB_NAMESPACE Clean_Path(fname, cpath); Inode *ptr; int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - std::cout << "fname : " << cpath << " " << isPresent << std::endl; if (isPresent) MYFS_CreateFile(this->FileSystemObj, cpath); else @@ -586,14 +577,12 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, IODebugContext *) { - std::cout << "Writable file" << std::endl; return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::NewRandomRWFile(const std::string &, const FileOptions &, std::unique_ptr *, IODebugContext *) { - std::cout << "RWWritable file" << std::endl; return IOStatus::IOError(__FUNCTION__); } @@ -614,7 +603,6 @@ namespace ROCKSDB_NAMESPACE IODebugContext *dbg) { - std::cout << "New Directory : " << name << std::endl; result->reset(); result->reset(new MYFS_Directory(this->FileSystemObj)); return IOStatus::OK(); @@ -652,11 +640,8 @@ namespace ROCKSDB_NAMESPACE std::string cpath; Clean_Path(dirname, cpath); Inode *ptr; - std::cout << "If dir missing : " << cpath << std::endl; std::string dir = cpath.substr(0, cpath.size() - 1); int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); - std::cout << std::endl - << std::endl; if (isPresent) isPresent = MYFS_CreateDir(this->FileSystemObj, dir); if (isPresent) @@ -667,6 +652,7 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { + std::string cpath; Clean_Path(fname, cpath); Inode *ptr; @@ -692,8 +678,7 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg) { - *output_path = db_path; - std::cout << "Get Abs path" << std::endl; + //*output_path = db_path; return IOStatus::OK(); } @@ -706,8 +691,6 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, IODebugContext *dbg) { - std::cout << "Logger \n" - << std::endl; return IOStatus::IOError(__FUNCTION__); } @@ -772,7 +755,6 @@ namespace ROCKSDB_NAMESPACE // Change in parent // verify if target exists - std::cout << "Rename file" << cpath_src << " " << cpath_target << std::endl; int isPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); if (isPresent) { @@ -822,13 +804,11 @@ namespace ROCKSDB_NAMESPACE Inode *ptr; int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - std::cout<<"Get children : "<FileSystemObj, ptr, "", result, true); if (err) return IOStatus::IOError(__FUNCTION__); - std::cout<<"Result size : "<size()<FileSystemObj, cpath, &ptr); if (isPresent) return IOStatus::NotFound(); @@ -966,7 +945,6 @@ namespace ROCKSDB_NAMESPACE int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) { - std::cout<<"Read on : "<ptr->EntityName<<" "<ptr->FileSize<<" "<FileSize < offset + size) { if(offset >= ptr->FileSize) return 0; @@ -1013,7 +991,6 @@ namespace ROCKSDB_NAMESPACE int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) { - std::cout<<"Append on : "<ptr->EntityName<<" "< addresses_to_read; int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); if (err) @@ -1025,7 +1002,6 @@ namespace ROCKSDB_NAMESPACE if (smargin) Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); - std::cout<<"memcpy : "<FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); @@ -1061,9 +1037,7 @@ namespace ROCKSDB_NAMESPACE { int sizeW = this->fp->Read(n, scratch); - std::cout<<"Read done with : "<size()<fp->PRead(offset, n, scratch); *result = Slice(scratch, sizeW); return IOStatus::OK(); @@ -1119,9 +1092,7 @@ namespace ROCKSDB_NAMESPACE char *block = (char *)data.data(); uint64_t size = data.size(); - std::cout<<"Appending "<fp->Append(size, block); - std::cout<fp;} + virtual ~MYFS_SequentialFile(){delete this->fp;} virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg)override; @@ -156,7 +156,7 @@ namespace ROCKSDB_NAMESPACE public: MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); - virtual ~MYFS_RandomAccessFile(){std::cout<<"Random Close the file : "<fp;} + virtual ~MYFS_RandomAccessFile(){delete this->fp;} virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) const override; /* @@ -222,9 +222,8 @@ namespace ROCKSDB_NAMESPACE MYFS *fp; public: MYFS_Directory(MYFS *FSObj){} - ~MYFS_Directory(){} + virtual ~MYFS_Directory(){} virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { - std::cout<<"Sample"< Date: Thu, 13 Oct 2022 16:06:01 +0000 Subject: [PATCH 057/101] fix leak bugs --- src/m45-rocksdb/S2FileSystem.cc | 7 ++++--- src/m45-rocksdb/S2FileSystem.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 8b5e972..e55f8de 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -44,7 +44,7 @@ namespace ROCKSDB_NAMESPACE int index = LookupMap_HashFunction(id); mapEntries *map = (mapEntries *)calloc(1, sizeof(mapEntries)); - map->id = id; + strcpy(map->id,id.c_str()); map->ptr = ptr; map->chain = NULL; @@ -70,7 +70,7 @@ namespace ROCKSDB_NAMESPACE while (head != NULL) { - if (head->id == id) + if (!strcmp(head->id,id.c_str())) { if (tmp == NULL) FSObj->LookupCache[index] = head->chain; @@ -94,7 +94,7 @@ namespace ROCKSDB_NAMESPACE while (head != NULL) { - if (head->id == id) + if (!strcmp(head->id,id.c_str())) break; head = head->chain; } @@ -685,6 +685,7 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { // MYFS_DeletePath(this->FileSystemObj, fname); + std::cout<<"Delete file called"< Date: Thu, 13 Oct 2022 16:08:18 +0000 Subject: [PATCH 058/101] CmakeLists --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8879913..b38dd9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(CMAKE_LIBRARY_PATH "/home/$ENV{USER}/local/lib/") set(CMAKE_INSTALL_PREFIX "/home/$ENV{USER}/local/") # Project configuration specific parameters -set(STOSYS_M45 OFF) +set(STOSYS_M45 ON) set(STOSYS_CMAKE_DEBUG OFF) set(STOSYS_ASAN ON) From 0f29775cb9065e8b6cdb67f84bc4b1d6db3e3eaa Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Thu, 13 Oct 2022 18:17:53 +0000 Subject: [PATCH 059/101] Slow working patch --- src/m45-rocksdb/S2FileSystem.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index e55f8de..fc9c0ef 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -862,7 +862,12 @@ namespace ROCKSDB_NAMESPACE { curr -= 480; int nth_indirect = curr / 510; + //What if ptr->Indirect_ptr_lba iptr = (Indirect_ptr *)calloc(1, 4096); + if(ptr->Indirect_ptr_lbas == 0) { + ptr->Indirect_ptr_lbas = get_FreeDataBlock(FSObj); + } + Load_From_NVM(FSObj, ptr->Indirect_ptr_lbas, iptr, 4096); for (int i = 0; i < nth_indirect; i++) Load_From_NVM(FSObj, iptr->Indirect_ptr_lbas, iptr, 4096); @@ -881,7 +886,7 @@ namespace ROCKSDB_NAMESPACE if (!addr) { addr = get_FreeDataBlock(FSObj); - *data_block_lba_ptr = addr; + *(data_block_lba_ptr+curr) = addr; } addresses->push_back(addr); curr++; From 7b45f8a54747cfa8e7793b636378b1d7455cabab Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Thu, 13 Oct 2022 20:59:21 +0000 Subject: [PATCH 060/101] Patch with write buffering --- src/m45-rocksdb/S2FileSystem.cc | 34 +++++++++++++++++++++++++++++++++ src/m45-rocksdb/S2FileSystem.h | 8 ++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index fc9c0ef..66c33aa 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -1083,6 +1083,8 @@ namespace ROCKSDB_NAMESPACE MYFS_WritableFile::MYFS_WritableFile(std::string fname, MYFS *FSObj) { this->fp = new MYFS_File(fname, FSObj); + this->cache = false; + this->cacheSize = 0; } IOStatus MYFS_WritableFile::Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) @@ -1093,11 +1095,43 @@ namespace ROCKSDB_NAMESPACE return IOStatus::OK(); } + IOStatus MYFS_WritableFile::ClearCache() { + if(!this->cache) + return IOStatus::OK(); + this->cache = false; + int err = this->fp->Append(this->cacheSize, this->cacheData); + if (err) + return IOStatus::IOError(__FUNCTION__); + free(this->cacheData); + this->cacheSize = 0; + return IOStatus::OK(); + } + IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) { char *block = (char *)data.data(); uint64_t size = data.size(); + if(this->cache) { + //Append to cache + char *tmp = (char *)calloc(1, this->cacheSize+size); + memcpy(tmp, this->cacheData, this->cacheSize); + memcpy(tmp+this->cacheSize, block, size); + free(this->cacheData); + this->cacheData = tmp; + this->cacheSize += size; + //If size > 4096 clear cache + if(this->cacheSize >= 4096) + this->ClearCache(); + return IOStatus::OK(); + } else if(size < 4096) { + //Append to cache + this->cache = true; + this->cacheData = (char *)calloc(1, size); + memcpy(this->cacheData, block, size); + this->cacheSize = size; + return IOStatus::OK(); + } int err = this->fp->Append(size, block); if (err) return IOStatus::IOError(__FUNCTION__); diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index 728f54b..ae2c9bc 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -109,6 +109,7 @@ namespace ROCKSDB_NAMESPACE struct Inode *ptr; MYFS *FSObj; uint64_t curr_read_offset; + void *current_ptr; public: MYFS_File(std::string filePath, MYFS *FSObj); @@ -177,10 +178,13 @@ namespace ROCKSDB_NAMESPACE { private: MYFS_File *fp; - + bool cache; + uint64_t cacheSize; + char *cacheData; + virtual IOStatus ClearCache(); public: MYFS_WritableFile(std::string fname, MYFS *FSObj); - virtual ~MYFS_WritableFile(){delete this->fp;} + virtual ~MYFS_WritableFile(){this->ClearCache();delete this->fp;} virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) override; virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) {return IOStatus::OK();}; From 9ae163c5d6b639fc38d1cbe3d8201ed6929516c6 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Thu, 13 Oct 2022 22:33:32 +0000 Subject: [PATCH 061/101] Larger buffering --- src/m45-rocksdb/S2FileSystem.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 66c33aa..4942369 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -1121,10 +1121,10 @@ namespace ROCKSDB_NAMESPACE this->cacheData = tmp; this->cacheSize += size; //If size > 4096 clear cache - if(this->cacheSize >= 4096) + if(this->cacheSize >= 4096*200) this->ClearCache(); return IOStatus::OK(); - } else if(size < 4096) { + } else if(size < 4096*200) { //Append to cache this->cache = true; this->cacheData = (char *)calloc(1, size); From 0d1f99a220e99132ef9bf22f38071efcccb45644 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 14 Oct 2022 08:32:24 +0000 Subject: [PATCH 062/101] clean version of m45 --- CMakeLists.txt | 2 +- src/m45-rocksdb/S2FileSystem.cc | 1006 ++----------------------------- src/m45-rocksdb/S2FileSystem.h | 215 +------ src/m45-rocksdb/rocks_s2fs.cc | 3 +- 4 files changed, 61 insertions(+), 1165 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b38dd9e..10a75d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,4 +146,4 @@ if(STOSYS_CMAKE_DEBUG) foreach (_variableName ${_variableNames}) message(STATUS "${_variableName}=${${_variableName}}") endforeach() -endif() +endif() \ No newline at end of file diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 4942369..623aa5d 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -28,397 +28,8 @@ SOFTWARE. #include #include -namespace ROCKSDB_NAMESPACE -{ - int LookupMap_HashFunction(std::string id) - { - unsigned hashindex; - char *ptr = const_cast(id.c_str()); - for (hashindex = 0; *ptr != '\0'; ptr++) - hashindex = *ptr + STRINGENCODE * hashindex; - return hashindex % LOOKUP_MAP_SIZE; - } - - int LookupMap_Insert(MYFS *FSObj, std::string id, Inode *ptr) - { - int index = LookupMap_HashFunction(id); - - mapEntries *map = (mapEntries *)calloc(1, sizeof(mapEntries)); - strcpy(map->id,id.c_str()); - map->ptr = ptr; - map->chain = NULL; - - if (FSObj->LookupCache[index] == NULL) - FSObj->LookupCache[index] = map; - else - { - struct mapEntries *head; - head = FSObj->LookupCache[index]; - while (head->chain != NULL) - head = head->chain; - head->chain = map; - } - - return 0; - } - - int LookupMap_Delete(MYFS *FSObj, std::string id) - { - int index = LookupMap_HashFunction(id); - struct mapEntries *head, *tmp = NULL; - head = FSObj->LookupCache[index]; - - while (head != NULL) - { - if (!strcmp(head->id,id.c_str())) - { - if (tmp == NULL) - FSObj->LookupCache[index] = head->chain; - else - tmp->chain = head->chain; - free(head); - break; - } - tmp = head; - head = head->chain; - } - - return 0; - } - - int LookupMap_Lookup(MYFS *FSObj, std::string id, Inode **ptr) - { - int index = LookupMap_HashFunction(id); - struct mapEntries *head; - head = FSObj->LookupCache[index]; - - while (head != NULL) - { - if (!strcmp(head->id,id.c_str())) - break; - head = head->chain; - } - - if (head == NULL) - return -1; - - *ptr = head->ptr; - return 0; - } - - int Load_From_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) - { - // Check the size if quantization of LBA - int err = zns_udevice_read(FSObj->zns, addr, buffer, size); - return 0; - } - - int Store_To_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) - { - int err = zns_udevice_write(FSObj->zns, addr, buffer, size); - return 0; - } - - uint32_t get_FreeInode(MYFS *FSObj) - { - uint32_t ptr = (FSObj->InodePtr + 1) % MAX_INODE_COUNT; - while (ptr != FSObj->InodePtr) - { - if (!FSObj->InodeBitMap[ptr]) - { - FSObj->InodePtr = ptr; - return ptr; - } - ptr = (ptr + 1) % MAX_INODE_COUNT; - } - return 0; - } - - uint64_t get_FreeDataBlock(MYFS *FSObj) - { - uint64_t ptr = (FSObj->DataBlockPtr + 1) % FSObj->DataBlockCount; - while (ptr != FSObj->DataBlockPtr) - { - if (!FSObj->DataBitMap[ptr]) - { - FSObj->DataBlockPtr = ptr; - return (ptr + DATA_BLOCKS_OFFSET) * FSObj->LogicalBlockSize; - } - ptr = (ptr + 1) % FSObj->DataBlockCount; - } - return 0; - } - - void free_DataBlock(MYFS *FSObj, uint64_t addr) - { - int index = (addr / FSObj->LogicalBlockSize) - DATA_BLOCKS_OFFSET; - FSObj->DataBitMap[index] = false; - } - - // Trim till /../path in /../path/name - void Get_ParentPath(std::string path, std::string &parent) - { - int index; - for (int i = path.size() - 1; i >= 0; i--) - { - if (path[i] == '/') - { - index = i; - break; - } - } - // Trim if additional slash is present - if (path[index - 1] == '/') - index--; - - parent = path.substr(0, index); - } - - // Trim /../path/name to name - void Get_EntityName(std::string path, std::string &entityName) - { - int index; - for (int i = path.size() - 1; i >= 0; i--) - { - if (path[i] == '/') - { - index = i; - break; - } - } - entityName = path.substr(index + 1, path.size()); - } - - void Clean_Path(std::string path, std::string &newPath) - { - std::string entity; - Get_EntityName(path, entity); - Get_ParentPath(path, newPath); - newPath.append("/"); - newPath.append(entity); - } - - // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool - // return value will be 0 if asked child is not present - uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren, std::string targetName = "") - { - - // Check no of children and load it - uint64_t children_count = ptr->FileSize; - - MYFS_Dir *dir_ptr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); - for (int i = 0; i < children_count / 16; i++) - { - Load_From_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); - for (int j = 0; j < 16; j++) - { - if (loadChildren) - children->push_back(dir_ptr->Entities[j].EntityName); - else - { - if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) - { - if (targetName == "") - { - uint32_t ret = dir_ptr->Entities[j].InodeNum; - free(dir_ptr); - return ret; - } - else - { - strcpy(dir_ptr->Entities[j].EntityName, targetName.c_str()); - Store_To_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); - free(dir_ptr); - return 0; - } - } - } - } - } - - Load_From_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); - for (int i = 0; i < children_count % 16; i++) - { - if (loadChildren) - children->push_back(dir_ptr->Entities[i].EntityName); - else - { - if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) - { - if (targetName == "") - { - uint32_t ret = dir_ptr->Entities[i].InodeNum; - free(dir_ptr); - return ret; - } - else - { - strcpy(dir_ptr->Entities[i].EntityName, targetName.c_str()); - Store_To_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); - free(dir_ptr); - return 0; - } - } - } - } - free(dir_ptr); - return 0; - } - - // A recursive call to load inode of the given path to lookupmap - // Stores the inode ptr as well, returns 0 in success - int Get_Path_Inode(MYFS *FSObj, std::string path, Inode **ptr) - { - if (path == "/tmp") - { - *ptr = FSObj->rootEntry; - return 0; - } - - // Check if path in lookupMap cache - int isPresent = LookupMap_Lookup(FSObj, path, ptr); - if (!isPresent) - return 0; - - // if not : Get_Path_Inode for parent dir - std::string parent; - Inode *parentInode; - Get_ParentPath(path, parent); - isPresent = Get_Path_Inode(FSObj, parent, &parentInode); - if (isPresent) - return -1; - // Read parent dir and get asked inode number - if (parentInode->FileSize == 0) - return -1; - // Get Entity to search for - std::string entityName; - Get_EntityName(path, entityName); - uint32_t index = Load_Children(FSObj, parentInode, entityName, NULL, false); - if (!index) - return -1; - - // Load the children index inode from disk and store in lookupMap; - uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; - ptr = (Inode **)calloc(1, sizeof(Inode)); - isPresent = Load_From_NVM(FSObj, address, ptr, (uint64_t)INODE_SIZE); - if (!isPresent) - return -1; - - // Put it in lookup Map - LookupMap_Insert(FSObj, path, *ptr); - return 0; - } - - int Rename_Child_In_Parent(MYFS *FSObj, std::string Ppath, std::string targetName, std::string srcName) - { - // FIXME: Logic for rename - Inode *parentInode; - int isPresent = Get_Path_Inode(FSObj, Ppath, &parentInode); - uint32_t rename = Load_Children(FSObj, parentInode, srcName, NULL, false, targetName); - return rename; - } - - int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) - { - // FIXME: Logic for deletion - Inode *ptr; - int isPresent = Get_Path_Inode(FSObj, Ppath, &ptr); - if (isPresent) - return -1; - - MYFS_DirData dirDataptr; - strcpy(dirDataptr.EntityName, childName.c_str()); - dirDataptr.InodeNum = childInode; - - MYFS_Dir *dirPtr; - dirPtr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); - int index = (++ptr->FileSize) / 16; - uint64_t addr = ptr->Direct_data_lbas[index]; - - if (!addr) - { - addr = get_FreeDataBlock(FSObj); - ptr->Direct_data_lbas[index] = addr; - } - else - { - index = Load_From_NVM(FSObj, addr, dirPtr, 4096); - if (index) - return -1; - } - - index = ptr->FileSize % 16; - dirPtr->Entities[index - 1] = dirDataptr; - Store_To_NVM(FSObj, addr, dirPtr, 4096); - free(dirPtr); - - return 0; - } - /* - void MYFS_DeletePath(MYFS *FSObj, std::string path) - { - Inode *ptr; - int isPresent = Get_Path_Inode(FSObj, path, &ptr); - if (isPresent) - return; - // TODO: Handle logic if dir - // Free data block of inode as well! - - // Update Parent - std::string ppath; - Get_ParentPath(path, ppath); - // Delete from lookup map - } - */ - int MYFS_CreateFile(MYFS *FSObj, std::string path) - { - uint32_t inode_no = get_FreeInode(FSObj); - Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); - // Fill the ptr - std::string entityName; - Get_EntityName(path, entityName); - strcpy(ptr->EntityName, entityName.c_str()); - ptr->Inode_no = inode_no; - - // Update parent - std::string parent; - Get_ParentPath(path, parent); - int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); - if (parentUpdated) - return -1; - - // Load to lookupmap - LookupMap_Insert(FSObj, path, ptr); - - return 0; - } - - int MYFS_CreateDir(MYFS *FSObj, std::string path) - { - uint32_t inode_no = get_FreeInode(FSObj); - Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); - // Fill the ptr - std::string entityName; - Get_EntityName(path, entityName); - strcpy(ptr->EntityName, entityName.c_str()); - ptr->IsDir = true; - ptr->Inode_no = inode_no; - - // Update parent - std::string parent; - Get_ParentPath(path, parent); - int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); - if (parentUpdated) - return -1; - - // Load to lookupmap - LookupMap_Insert(FSObj, path, ptr); - - return 0; - } - - S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) - { +namespace ROCKSDB_NAMESPACE { + S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) { FileSystem::Default(); std::string sdelimiter = ":"; std::string edelimiter = "://"; @@ -426,75 +37,25 @@ namespace ROCKSDB_NAMESPACE struct zdev_init_params params; std::string device = uri_db_path.substr(uri_db_path.find(sdelimiter) + sdelimiter.size(), uri_db_path.find(edelimiter) - - (uri_db_path.find(sdelimiter) + sdelimiter.size())); - // make sure to setup these parameters properly and check the forced reset flag for M5 + (uri_db_path.find(sdelimiter) + sdelimiter.size())); + //make sure to setup these parameters properly and check the forced reset flag for M5 params.name = strdup(device.c_str()); params.log_zones = 3; params.gc_wmark = 1; params.force_reset = true; int ret = init_ss_zns_device(¶ms, &this->_zns_dev); - free(params.name); - if (ret != 0) - { + if(ret != 0){ std::cout << "Error: " << uri_db_path << " failed to open the device " << device.c_str() << "\n"; std::cout << "Error: ret " << ret << "\n"; } - assert(ret == 0); + assert (ret == 0); assert(this->_zns_dev->lba_size_bytes != 0); assert(this->_zns_dev->capacity_bytes != 0); ss_dprintf(DBG_FS_1, "device %s is opened and initialized, reported LBA size is %u and capacity %lu \n", device.c_str(), this->_zns_dev->lba_size_bytes, this->_zns_dev->capacity_bytes); - - // INIT File System - // TODO: In case of persistency; Read following data from Super block - // Init Bitmaps from disk - if (debug) - std::cout << "Init MYFS" << std::endl; - this->FileSystemObj = (MYFS *)calloc(1, sizeof(MYFS)); - this->FileSystemObj->zns = this->_zns_dev; - this->FileSystemObj->FileSystemCapacity = this->_zns_dev->capacity_bytes; - this->FileSystemObj->LogicalBlockSize = this->_zns_dev->lba_size_bytes; - // We reserve a single block as super block and MAX_INODE_COUNT as - this->FileSystemObj->DataBlockCount = (this->FileSystemObj->FileSystemCapacity / this->FileSystemObj->LogicalBlockSize - (MAX_INODE_COUNT + 1)); - if (debug) - std::cout << "File System params : " << this->FileSystemObj->FileSystemCapacity << " " << this->FileSystemObj->LogicalBlockSize << " " << this->FileSystemObj->DataBlockCount << std::endl; - - // Init Data blocks bitmap - // this->FileSystemObj->LookupCache = (mapEntries *) calloc(LOOKUP_MAP_SIZE, sizeof(mapEntries)); - this->FileSystemObj->DataBitMap = (bool *)calloc(this->FileSystemObj->DataBlockCount, sizeof(bool)); - - // Init root inode - // TODO: In case of persistency check if already present in disk - // FIXME: Get root dir name dynamically - this->FileSystemObj->DataBlockPtr = 0; // Reserved for Root Node - this->FileSystemObj->InodePtr = 0; - this->FileSystemObj->InodeBitMap[0] = true; - *(this->FileSystemObj->DataBitMap) = true; - this->FileSystemObj->rootEntry = (Inode *)calloc(1, sizeof(Inode)); - strcpy(this->FileSystemObj->rootEntry->EntityName, "tmp"); - this->FileSystemObj->rootEntry->IsDir = true; - this->FileSystemObj->rootEntry->Inode_no = 0; - this->FileSystemObj->rootEntry->FileSize = 0; - this->FileSystemObj->rootEntry->Direct_data_lbas[0] = DATA_BLOCKS_OFFSET * this->FileSystemObj->LogicalBlockSize; } - S2FileSystem::~S2FileSystem() - { - deinit_ss_zns_device(this->FileSystemObj->zns); - //TODO: Store before Free - free(this->FileSystemObj->rootEntry); - free(this->FileSystemObj->DataBitMap); - - for(int i=0;iFileSystemObj->LookupCache[i], *tmp; - while(head!=NULL) { - tmp = head; - head = head->chain; - free(tmp->ptr); - free(tmp); - } - } - free(this->FileSystemObj); + S2FileSystem::~S2FileSystem() { } // Create a brand new sequentially-readable file with the specified name. @@ -504,22 +65,11 @@ namespace ROCKSDB_NAMESPACE // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) - { - std::string cpath; - Clean_Path(fname, cpath); - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - - result->reset(); - result->reset(new MYFS_SequentialFile(cpath, this->FileSystemObj)); - return IOStatus::OK(); + std::unique_ptr *result, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) - { + IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } @@ -531,22 +81,11 @@ namespace ROCKSDB_NAMESPACE // // The returned file may be concurrently accessed by multiple threads. IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) - { - std::string cpath; - Clean_Path(fname, cpath); - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - - result->reset(); - result->reset(new MYFS_RandomAccessFile(cpath, this->FileSystemObj)); - return IOStatus::OK(); + std::unique_ptr *result, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } - const char *S2FileSystem::Name() const - { + const char *S2FileSystem::Name() const { return "S2FileSytem"; } @@ -558,36 +97,21 @@ namespace ROCKSDB_NAMESPACE // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) - { - std::string cpath; - Clean_Path(fname, cpath); - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - MYFS_CreateFile(this->FileSystemObj, cpath); - else - ptr->FileSize = 0; - - result->reset(); - result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); - return IOStatus::OK(); + std::unique_ptr *result, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) - { + IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::NewRandomRWFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) - { + IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) - { + IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) { return IOStatus::IOError(__FUNCTION__); } @@ -600,112 +124,66 @@ namespace ROCKSDB_NAMESPACE // returns non-OK. IOStatus S2FileSystem::NewDirectory(const std::string &name, const IOOptions &io_opts, std::unique_ptr *result, - IODebugContext *dbg) - { - - result->reset(); - result->reset(new MYFS_Directory(this->FileSystemObj)); - return IOStatus::OK(); + IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) - { + IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) - { + IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } // Create the specified directory. Returns error if directory exists. - IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) - { - std::string cpath; - Clean_Path(dirname, cpath); - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - isPresent = MYFS_CreateDir(this->FileSystemObj, cpath); - else - return IOStatus::IOError(__FUNCTION__); - - return IOStatus::OK(); + IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } // Creates directory if missing. Return Ok if it exists, or successful in // Creating. - IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) - { - std::string cpath; - Clean_Path(dirname, cpath); - Inode *ptr; - std::string dir = cpath.substr(0, cpath.size() - 1); - int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); - if (isPresent) - isPresent = MYFS_CreateDir(this->FileSystemObj, dir); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); + IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } IOStatus - S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) - { - - std::string cpath; - Clean_Path(fname, cpath); - Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - else - *file_size = ptr->FileSize; - return IOStatus::OK(); + S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) - { + IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetFileModificationTime(const std::string &fname, const IOOptions &options, uint64_t *file_mtime, - IODebugContext *dbg) - { + IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, - IODebugContext *dbg) - { - //*output_path = db_path; - return IOStatus::OK(); + IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) - { - // MYFS_DeletePath(this->FileSystemObj, fname); - std::cout<<"Delete file called"< *result, - IODebugContext *dbg) - { + IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) - { + IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. - IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) - { - return IOStatus::OK(); + IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } // Lock the specified file. Used to prevent concurrent access to @@ -722,71 +200,30 @@ namespace ROCKSDB_NAMESPACE // to go away. // // May create the named file if it does not already exist. - IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) - { - return IOStatus::OK(); + IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } IOStatus - S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) - { + S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) - { + IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) - { + IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::RenameFile(const std::string &src, const std::string &target, const IOOptions &options, - IODebugContext *dbg) - { - std::string cpath_target, cpath_src; - Clean_Path(src, cpath_src); - Clean_Path(target, cpath_target); - Inode *targetptr, *sourceptr; - // MYFS_DeletePath(this->FileSystemObj, target); - // FIXME: Logic for rename - // Change name in Inode - // Change in parent - - // verify if target exists - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); - if (isPresent) - { - // if it is not present - // rename the inode - std::string entityName; - Get_EntityName(cpath_target, entityName); - Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); - strcpy(sourceptr->EntityName, entityName.c_str()); - LookupMap_Delete(this->FileSystemObj, cpath_src); - - LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); - // rename the entity in the parent - std::string srcEntityName; - Get_EntityName(cpath_src, srcEntityName); - - std::string parentPath; - Get_ParentPath(cpath_target, parentPath); - int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, srcEntityName); - if (parentUpdated) - return IOStatus::IOError(__FUNCTION__); - } - else - { - } - return IOStatus::OK(); + IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetChildrenFileAttributes(const std::string &dir, const IOOptions &options, - std::vector *result, IODebugContext *dbg) - { + std::vector *result, IODebugContext *dbg) { return FileSystem::GetChildrenFileAttributes(dir, options, result, dbg); } @@ -798,19 +235,8 @@ namespace ROCKSDB_NAMESPACE // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, - IODebugContext *dbg) - { - std::string cpath; - Get_ParentPath(dir, cpath); - Inode *ptr; - - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - return IOStatus::IOError(__FUNCTION__); - uint32_t err = Load_Children(this->FileSystemObj, ptr, "", result, true); - if (err) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); + IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } // Returns OK if the named file exists. @@ -818,341 +244,13 @@ namespace ROCKSDB_NAMESPACE // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered - IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) - { - Inode *ptr; - std::string cpath; - Clean_Path(fname, cpath); - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) - return IOStatus::NotFound(); - return IOStatus::OK(); + IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { + return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) - { + std::unique_ptr *result, IODebugContext *dbg) { return IOStatus::IOError(__FUNCTION__); } - - int load_nth_indirect_block(MYFS *FSObj, uint32_t n, uint64_t indirect_lba, Indirect_ptr **ptr) - { - for (int i = 0; i < n; i++) - Load_From_NVM(FSObj, (*ptr)->Indirect_ptr_lbas, *ptr, 4096); - } - - int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) - { - uint32_t curr = offset / 4096, end = (offset+size) / 4096; - uint64_t if_dirty_addr; - uint64_t *data_block_lba_ptr, next_indirect_block_addr; - uint32_t no_of_data_block_ptrs; - Indirect_ptr *iptr = NULL; - // Load the direct ptr - if (curr < 480) - { - // In Inode block itself - data_block_lba_ptr = ptr->Direct_data_lbas; - no_of_data_block_ptrs = 480; - next_indirect_block_addr = ptr->Indirect_ptr_lbas; - if_dirty_addr = 4096 + (ptr->Inode_no * INODE_SIZE); - } - else - { - curr -= 480; - int nth_indirect = curr / 510; - //What if ptr->Indirect_ptr_lba - iptr = (Indirect_ptr *)calloc(1, 4096); - if(ptr->Indirect_ptr_lbas == 0) { - ptr->Indirect_ptr_lbas = get_FreeDataBlock(FSObj); - } - - Load_From_NVM(FSObj, ptr->Indirect_ptr_lbas, iptr, 4096); - for (int i = 0; i < nth_indirect; i++) - Load_From_NVM(FSObj, iptr->Indirect_ptr_lbas, iptr, 4096); - - data_block_lba_ptr = iptr->Direct_data_lbas; - next_indirect_block_addr = iptr->Indirect_ptr_lbas; - no_of_data_block_ptrs = 510; - curr = curr % 510; - if_dirty_addr = iptr->Current_addr; - } - - uint64_t addr; - for (int i = 0; i <= end; i++) - { - addr = *(data_block_lba_ptr + curr); - if (!addr) - { - addr = get_FreeDataBlock(FSObj); - *(data_block_lba_ptr+curr) = addr; - } - addresses->push_back(addr); - curr++; - - if (curr == no_of_data_block_ptrs) - { - if (!next_indirect_block_addr) - { - // If no indirect block ptr, create one and store to mem - next_indirect_block_addr = get_FreeDataBlock(FSObj); - if (iptr == NULL) - { - ptr->Indirect_ptr_lbas = next_indirect_block_addr; - Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); - - } - else - { - iptr->Indirect_ptr_lbas = next_indirect_block_addr; - Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); - free(iptr); - } - iptr = (Indirect_ptr *)calloc(1, 4096); - iptr->Current_addr = next_indirect_block_addr; - } - else - { - if (iptr == NULL) - iptr = (Indirect_ptr *)calloc(1, 4096); - - Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); - } - next_indirect_block_addr = iptr->Indirect_ptr_lbas; - no_of_data_block_ptrs = 510; - data_block_lba_ptr = iptr->Direct_data_lbas; - curr = 0; - } - } - - // Store dirty block to NVM - if (iptr == NULL) - { - // addresses->push_back(); - Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); - } - else - { - Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); - } - - free(iptr); - return 0; - } - - // MYFS_File definition - MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) - { - this->FSObj = FSObj; - Get_Path_Inode(FSObj, filePath, &(this->ptr)); - this->curr_read_offset = 0; - } - - int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) - { - if (ptr->FileSize < offset + size) { - if(offset >= ptr->FileSize) - return 0; - size = ptr->FileSize - offset; - } - - std::vector addresses_to_read; - int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); - if (err) - return -1; - - char *readD = (char *)calloc(addresses_to_read.size(), 4096); - for (int i = 0; i < addresses_to_read.size(); i++) - Load_From_NVM(this->FSObj, addresses_to_read.at(i), readD + (i * 4096), 4096); - - int smargin = offset % 4096; - memcpy(data, readD + smargin, size); - free(readD); - return size; - } - - int MYFS_File::Read(uint64_t size, char *data) - { - // Check with file size - int sizeW = this->PRead(this->curr_read_offset, size, data); - this->curr_read_offset += sizeW; - return sizeW; - } - - int MYFS_File::Seek(uint64_t offset) - { - if (ptr->FileSize < this->curr_read_offset + offset) - return -1; - this->curr_read_offset += offset; - return 0; - } - - int MYFS_File::Truncate(uint64_t size) - { - // TODO: Free Data Block - this->ptr->FileSize = size; - return 0; - } - - int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) - { - std::vector addresses_to_read; - int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); - if (err) - return -1; - - // Do read-modify-update cycle if smargin is present on 1st address. - int smargin = offset % 4096; - char *buffer = (char *)calloc(addresses_to_read.size(), 4096); - if (smargin) - Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); - - memcpy(buffer + smargin, data, size); - for (int i = 0; i < addresses_to_read.size(); i++) - Store_To_NVM(this->FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); - - // Update file size - this->ptr->FileSize = offset + size; - free(buffer); - return 0; - } - - int MYFS_File::Append(uint64_t size, char *data) - { - return this->PAppend(this->ptr->FileSize, size, data); - } - - uint64_t MYFS_File::GetFileSize() - { - return this->ptr->FileSize; - } - - int MYFS_File::Close() - { - // Flush Inode changes to Disk - } - - // Def of MYFS_SequentialFile - MYFS_SequentialFile::MYFS_SequentialFile(std::string fpath, MYFS *FSObj) - { - this->fp = new MYFS_File(fpath, FSObj); - } - - IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) - { - - int sizeW = this->fp->Read(n, scratch); - *result = Slice(scratch, sizeW); - return IOStatus::OK(); - } - - // IOStatus MYFS_SequentialFile::PositionedRead(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, - // char *scratch, IODebugContext *dbg) - // { - // int err = this->fp->PRead(offset, n, scratch); - // if (err) - // return IOStatus::IOError(__FUNCTION__); - // *result = Slice(scratch, n); - // return IOStatus::OK(); - // } - - IOStatus MYFS_SequentialFile::Skip(uint64_t n) - { - int err = this->fp->Seek(n); - if (err) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); - } - - // Def MYFS_RandomAccessFile - MYFS_RandomAccessFile::MYFS_RandomAccessFile(std::string fname, MYFS *FSObj) - { - this->fp = new MYFS_File(fname, FSObj); - } - - IOStatus MYFS_RandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, - IODebugContext *dbg) const - { - int sizeW = this->fp->PRead(offset, n, scratch); - *result = Slice(scratch, sizeW); - return IOStatus::OK(); - } - - // Def MYFS_WritableFile - MYFS_WritableFile::MYFS_WritableFile(std::string fname, MYFS *FSObj) - { - this->fp = new MYFS_File(fname, FSObj); - this->cache = false; - this->cacheSize = 0; - } - - IOStatus MYFS_WritableFile::Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) - { - int err = this->fp->Truncate(size); - if (err) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); - } - - IOStatus MYFS_WritableFile::ClearCache() { - if(!this->cache) - return IOStatus::OK(); - this->cache = false; - int err = this->fp->Append(this->cacheSize, this->cacheData); - if (err) - return IOStatus::IOError(__FUNCTION__); - free(this->cacheData); - this->cacheSize = 0; - return IOStatus::OK(); - } - - IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) - { - - char *block = (char *)data.data(); - uint64_t size = data.size(); - if(this->cache) { - //Append to cache - char *tmp = (char *)calloc(1, this->cacheSize+size); - memcpy(tmp, this->cacheData, this->cacheSize); - memcpy(tmp+this->cacheSize, block, size); - free(this->cacheData); - this->cacheData = tmp; - this->cacheSize += size; - //If size > 4096 clear cache - if(this->cacheSize >= 4096*200) - this->ClearCache(); - return IOStatus::OK(); - } else if(size < 4096*200) { - //Append to cache - this->cache = true; - this->cacheData = (char *)calloc(1, size); - memcpy(this->cacheData, block, size); - this->cacheSize = size; - return IOStatus::OK(); - } - int err = this->fp->Append(size, block); - if (err) - return IOStatus::IOError(__FUNCTION__); - return IOStatus::OK(); - } - - // MYFS_Directory::MYFS_Directory(std::string name) { - // std::cout<<"For checl"<fp->PAppend(offset, size, block); - std::cout<<"PAppend size : "< #include -#define LOOKUP_MAP_SIZE 1000 -#define MAX_INODE_COUNT 255 -#define INODE_SIZE 4096 -#define SUPER_BLOCK_SIZE 4096 -#define STRINGENCODE 31 -#define DATA_BLOCKS_OFFSET 256 -namespace ROCKSDB_NAMESPACE -{ - - struct Inode - { - uint32_t Inode_no; - char EntityName[235]; - bool IsDir; - uint64_t FileSize; - uint64_t Indirect_ptr_lbas; - uint64_t Direct_data_lbas[480]; - }; - - struct mapEntries - { - char id[1000]; - Inode *ptr; - mapEntries *chain; - }; - - struct Indirect_ptr - { - uint64_t Current_addr; - uint64_t Direct_data_lbas[510]; - uint64_t Indirect_ptr_lbas; - }; - - struct MYFS_DirData - { - char EntityName[252]; - uint32_t InodeNum; - }; - - struct MYFS_Dir - { - MYFS_DirData Entities[16]; - }; - - struct MYFS - { - mapEntries *LookupCache[LOOKUP_MAP_SIZE]; // Map type to void ptrs; - bool InodeBitMap[MAX_INODE_COUNT]; - bool *DataBitMap; - uint32_t InodePtr; - - uint64_t DataBlockPtr; - uint64_t DataBlockMax; - - uint64_t DataBlockCount; - uint64_t FileSystemCapacity; - uint32_t LogicalBlockSize; - Inode *rootEntry; - user_zns_device *zns; - }; - - /* - int Load_From_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); - int Store_To_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); - void Get_ParentPath(std::string path, std::string &parent); - void Get_EntityName(std::string path, std::string &entityName); - //void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); - // int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); - int LookupMap_HashFunction(void *data); - */ - - - class MYFS_File - { - private: - struct Inode *ptr; - MYFS *FSObj; - uint64_t curr_read_offset; - void *current_ptr; +namespace ROCKSDB_NAMESPACE { - public: - MYFS_File(std::string filePath, MYFS *FSObj); - virtual ~MYFS_File() = default; - int Read(uint64_t size, char *data); - int PRead(uint64_t offset, uint64_t size, char *data); - int Seek(uint64_t offset); - int Truncate(uint64_t size); - int Append(uint64_t size, char *data); - int PAppend(uint64_t offset, uint64_t size, char *data); - uint64_t GetFileSize(); - int Close(); - }; - - /* - *Creates read only MYFS_File object - */ - class MYFS_SequentialFile : public FSSequentialFile - { - private: - MYFS_File *fp; - - public: - MYFS_SequentialFile(std::string filePath, MYFS *FSObj); - virtual ~MYFS_SequentialFile(){delete this->fp;} - virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, - char *scratch, IODebugContext *dbg)override; - - virtual IOStatus Skip(uint64_t n) override; - // virtual IOStatus PositionedRead(uint64_t offset, size_t n, - // const IOOptions &opts, Slice *result, - // char *scratch, IODebugContext *dbg) override; - // virtual IOStatus InvalidateCache(size_t offset, size_t length) override - // { - // return IOStatus::OK(); - // }; - // virtual bool use_direct_io() const override { return true; } - // virtual size_t GetRequiredBufferAlignment() const override { return 4096; } - }; - - class MYFS_RandomAccessFile : public FSRandomAccessFile - { - private: - MYFS_File *fp; - - public: - MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); - virtual ~MYFS_RandomAccessFile(){delete this->fp;} - virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, - Slice *result, char *scratch, IODebugContext *dbg) const override; - /* - virtual IOStatus MultiRead(FSReadRequest *reqs, size_t num_reqs, - const IOOptions &options, - IODebugContext *dbg) {std::cout<<"MULTIREAD"<ClearCache();delete this->fp;} - virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, - IODebugContext *dbg) override; - virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) {return IOStatus::OK();}; - virtual IOStatus Append(const Slice &data, const IOOptions &opts, - IODebugContext *dbg) override; - virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } - virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } - /* - virtual IOStatus Append(const Slice &data, const IOOptions &opts, - const DataVerificationInfo & /* verification_info , - IODebugContext *dbg) override - { - return Append(data, opts, dbg); - } - virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, - const IOOptions &opts, - IODebugContext *dbg) override; - virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, - const IOOptions &opts, const DataVerificationInfo & /* verification_info, - IODebugContext *dbg) override - { - return PositionedAppend(data, offset, opts, dbg); - } - - virtual IOStatus Fsync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } - virtual bool IsSyncThreadSafe() const { return false; } - virtual bool use_direct_io() const override { return true; } - virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {} - virtual uint64_t GetFileSize(const IOOptions &opts, - IODebugContext *dbg) override {std::cout<<"Calling this module"<fp->GetFileSize();} - virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); } - virtual size_t GetRequiredBufferAlignment() const override { return 4096; } - */ - }; - - class MYFS_Directory : public FSDirectory - { - private: - MYFS *fp; - public: - MYFS_Directory(MYFS *FSObj){} - virtual ~MYFS_Directory(){} - virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { - return IOStatus::OK(); - } - }; - - class S2FileSystem : public FileSystem - { + class S2FileSystem : public FileSystem { public: // No copying allowed S2FileSystem(std::string uri, bool debug); - S2FileSystem(const S2FileSystem &) = delete; + S2FileSystem(const S2FileSystem&) = delete; virtual ~S2FileSystem(); IOStatus IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) override; @@ -291,9 +91,9 @@ namespace ROCKSDB_NAMESPACE GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg); - IOStatus DeleteFile(const std::string &fname, - const IOOptions &options, - IODebugContext *dbg); + IOStatus DeleteFile(const std::string& fname, + const IOOptions& options, + IODebugContext* dbg); IOStatus NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, @@ -331,8 +131,7 @@ namespace ROCKSDB_NAMESPACE struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; - struct MYFS *FileSystemObj; }; } -#endif // STOSYS_PROJECT_S2FILESYSTEM_H +#endif //STOSYS_PROJECT_S2FILESYSTEM_H \ No newline at end of file diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index 94a0e86..c57e028 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(true){ + if(false){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { @@ -50,4 +50,3 @@ namespace ROCKSDB_NAMESPACE { return ret_fs->get(); }); } - From 758e5ddf1b60cf74a55b5dbf5ebc945ed75fc5c9 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 14 Oct 2022 12:40:42 +0000 Subject: [PATCH 063/101] clean code --- src/m1/device.cpp | 304 +++++++++++++++++++----------------- src/m1/device.h | 42 +++-- src/m1/m1.cpp | 117 +++++++------- src/m45-rocksdb/m45_main.cc | 10 +- 4 files changed, 247 insertions(+), 226 deletions(-) diff --git a/src/m1/device.cpp b/src/m1/device.cpp index aa15fd4..8369aa8 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -20,41 +20,41 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include -#include #include #include +#include +#include +#include +#include +#include #include #include #include "device.h" #include "../common/nvmeprint.h" // Examples lifted from, https://github.com/linux-nvme/libnvme/blob/667334ff8c53dbbefa51948bbe2e086624bf4d0d/test/cpp.cc -int count_and_show_all_nvme_devices() { - nvme_root_t r; +int count_and_show_all_nvme_devices() +{ nvme_host_t h; nvme_subsystem_t s; nvme_ctrl_t c; nvme_path_t p; nvme_ns_t n; int count = 0; - - r = nvme_scan(nullptr); + nvme_root_t r = nvme_scan(nullptr); if (!r) return -1; - nvme_for_each_host(r, h) { nvme_for_each_subsystem(h, s) { std::cout << nvme_subsystem_get_name(s) << " - NQN=" << nvme_subsystem_get_nqn(s) - << "\n"; + << std::endl; nvme_subsystem_for_each_ctrl(s, c) { std::cout << " `- " << nvme_ctrl_get_name(c) << " " << nvme_ctrl_get_transport(c) << " " << nvme_ctrl_get_address(c) << " " << nvme_ctrl_get_state(c) - << "\n"; + << std::endl; nvme_ctrl_for_each_ns(c, n) { std::cout << " `- " << nvme_ns_get_name(n) @@ -62,66 +62,76 @@ int count_and_show_all_nvme_devices() { << nvme_ns_get_lba_size(n) << " lba max:" << nvme_ns_get_lba_count(n) - << "\n"; + << std::endl; } nvme_ctrl_for_each_path(c, p) { std::cout << " `- " << nvme_path_get_name(p) << " " << nvme_path_get_ana_state(p) - << "\n"; + << std::endl; } - count++; + ++count; } } } - std::cout << "\n"; + std::cout << std::endl; nvme_free_tree(r); return count; } extern "C" { -int scan_and_identify_zns_devices(struct ss_nvme_ns *list){ - int ret; +int scan_and_identify_zns_devices(ss_nvme_ns *list) +{ int ns_counter = 0; - nvme_root_t root; nvme_host_t h; nvme_subsystem_t subsystem; nvme_ctrl_t controller; nvme_ns_t nspace; - nvme_id_ns ns{}; - - root = nvme_scan(nullptr /* for now the config file is NULL */); - if (!root){ - printf("nvme_scan call failed with errno %d , null pointer returned in the scan call\n", -errno); + nvme_id_ns ns; + nvme_root_t root = nvme_scan(nullptr /* for now the config file is NULL */); + if (!root) { + std::cout << "nvme_scan call failed with errno " + << -errno + << " , null pointer returned in the scan call" + << std::endl; return -1; } nvme_for_each_host(root, h) { nvme_for_each_subsystem(h, subsystem) { - printf("root (%d) |- name: %s sysfs_dir %s subsysqn %s \n", ns_counter, - nvme_subsystem_get_name(subsystem), - nvme_subsystem_get_sysfs_dir(subsystem), nvme_subsystem_get_nqn(subsystem)); + std::cout << "root (" << ns_counter + << ") |- name: " << nvme_subsystem_get_name(subsystem) + << " sysfs_dir " + << nvme_subsystem_get_sysfs_dir(subsystem) + << " subsysqn " << nvme_subsystem_get_nqn(subsystem) + << std::endl; nvme_subsystem_for_each_ctrl(subsystem, controller) { - printf("\t|- controller : name %s (more to follow) \n ", nvme_ctrl_get_name(controller)); + std::cout << "\t|- controller : name " + << nvme_ctrl_get_name(controller) + << " (more to follow)" << std::endl; nvme_ctrl_for_each_ns(controller, nspace) { - printf("\t\t|- namespace : name %s and command set identifier (csi) is %d (= 0 NVMe, 2 = ZNS), more to follow) \n ", - nvme_ns_get_name(nspace), nvme_ns_get_csi(nspace)); + std::cout << "\t\t|- namespace : name " + << nvme_ns_get_name(nspace) + << " and command set identifier (csi) is " + << nvme_ns_get_csi(nspace) + << " (= 0 NVMe, 2 = ZNS), more to follow)" + << std::endl; list[ns_counter].ctrl_name = strdup(nvme_ns_get_name(nspace)); - if (nvme_ns_get_csi(nspace) == NVME_CSI_ZNS) { + if (nvme_ns_get_csi(nspace) == NVME_CSI_ZNS) list[ns_counter].supports_zns = true; - } else{ + else list[ns_counter].supports_zns = false; - } // for convenience - nvme_get_nsid(nvme_ns_get_fd(nspace), &list[ns_counter].nsid); - ret = nvme_ns_identify(nspace, &ns); + nvme_get_nsid(nvme_ns_get_fd(nspace), + &list[ns_counter].nsid); + int ret = nvme_ns_identify(nspace, &ns); if (ret) { - printf("ERROR : failed to identify the namespace with %d and errno %d \n", ret, errno); + std::cout << "ERROR : failed to identify the namespace with " + << ret << " and errno " << errno << std::endl; return ret; } - //nvme_show_id_ns(&ns); - ns_counter++; + ++ns_counter; } } } @@ -130,39 +140,38 @@ int scan_and_identify_zns_devices(struct ss_nvme_ns *list){ return 0; } -int show_zns_zone_status(int fd, int nsid, struct zone_to_test *ztest){ +int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztest) +{ // ZNS specific data structures as specified in the TP 4053 - struct nvme_zns_id_ns s_zns_nsid{}; - struct nvme_zns_id_ctrl s_zns_ctrlid{}; - struct nvme_zone_report zns_report{}; - struct nvme_zns_desc *desc = nullptr, *_ztest = nullptr; // standard NVMe structures - struct nvme_id_ns s_nsid{}; - int ret; - uint64_t num_zones; - // lets first get the NVMe ns identify structure (again), we need some information from it to complement the + nvme_id_ns s_nsid; + // lets first get the NVMe ns identify structure (again), + // we need some information from it to complement the // information present in the ZNS ns identify structure - ret = nvme_identify_ns(fd, nsid, &s_nsid); - if(ret != 0){ - fprintf(stderr, "failed to identify NVMe namespace, ret %d \n", ret); + int ret = nvme_identify_ns(fd, nsid, &s_nsid); + if (ret) { + std::cerr << "failed to identify NVMe namespace, ret " + << ret << std::endl; return ret; } // see figure 8, section 3.1.1 in the ZNS specification + nvme_zns_id_ns s_zns_nsid; ret = nvme_zns_identify_ns(fd, nsid, &s_zns_nsid); - if (ret != 0) { - fprintf(stderr, "failed to identify ZNS namespace, ret %d \n", ret); + if (ret) { + std::cerr << "failed to identify ZNS namespace, ret " + << ret << std::endl; return -ret; } ss_nvme_show_zns_id_ns(&s_zns_nsid, &s_nsid); - // 3.1.2, figure 10 in the ZNS specification + nvme_zns_id_ctrl s_zns_ctrlid; ret = nvme_zns_identify_ctrl(fd, &s_zns_ctrlid); - if (ret != 0) { - fprintf(stderr, "failed to identify ZNS controller, ret %d \n", ret); + if (ret) { + std::cerr << "failed to identify ZNS controller, ret " + << ret << std::endl; return ret; } ss_nvme_show_zns_id_ctrl(&s_zns_ctrlid); - // now we send the management related commands - see section 4.3 and 4.4 in TP 4053 // we are now trying to retrieve the number of zones with other information present in the zone report // the following function takes arguments that are required to filled the command structure as shown @@ -174,152 +183,157 @@ int show_zns_zone_status(int fd, int nsid, struct zone_to_test *ztest){ // Pay attention what is being passed in the zns_report pointer and size, I am passing a structure // _WITHOUT_ its entries[] field initialized because we do not know how many zones does this namespace // hence we first get the number of zones, and then try again to get the full report - ret = nvme_zns_mgmt_recv(fd, nsid, 0, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - 0, sizeof(zns_report), (void *)&zns_report); - if(ret != 0) { - fprintf(stderr, "failed to report zones, ret %d \n", ret); + nvme_zone_report zns_report; + ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, false, + sizeof(zns_report), &zns_report); + if (ret) { + std::cerr << "failed to report zones, ret " << ret << std::endl; return ret; } // see figures 37-38-39 in section 4.4.1 - num_zones = le64_to_cpu(zns_report.nr_zones); + uint64_t num_zones = le64_to_cpu(zns_report.nr_zones); printf("nr_zones:%" PRIu64"\n", num_zones); // lets get more information about the zones - the total metadata size would be // see the figure 37 in the ZNS description // so we allocated an structure with a flat memory and point the zone_reports to it // An alternate strategy would have been just allocate a 4kB page and get some numbers of zone reports whatever can // fit in that in a loop. - uint64_t total_size = sizeof(zns_report) + (num_zones * sizeof(struct nvme_zns_desc)); - char *zone_reports = (char*) calloc (1, total_size); - ret = nvme_zns_mgmt_recv(fd, nsid, 0, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - 1, total_size, (void *)zone_reports); - if(ret !=0) { - fprintf(stderr, "failed to report zones, ret %d \n", ret); + uint64_t total_size = sizeof(zns_report) + + num_zones * sizeof(nvme_zns_desc); + std::unique_ptr zone_reports(new char[total_size]()); + ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, true, + total_size, zone_reports.get()); + if (ret) { + std::cerr << "failed to report zones, ret " << ret << std::endl; return ret; } - desc = ((struct nvme_zone_report*) zone_reports)->entries; - num_zones = le64_to_cpu(((struct nvme_zone_report*) zone_reports)->nr_zones); + nvme_zns_desc *desc = ((nvme_zone_report *)zone_reports.get())->entries; + num_zones = le64_to_cpu(((nvme_zone_report *)zone_reports.get())->nr_zones); // otherwise we got all our reports, check again - printf("With the reports we have num_zones %lu (for which data transfer happened) \n", num_zones); - for(uint64_t i = 0; i < num_zones; i++){ + std::cout << "With the reports we have num_zones " << num_zones + << " (for which data transfer happened)" << std::endl; + nvme_zns_desc *_ztest = nullptr; + for (uint64_t i = 0; i < num_zones; ++i) { // see figure 39 for description of these fields - printf("\t SLBA: 0x%-8" PRIx64" WP: 0x%-8" PRIx64" Cap: 0x%-8" PRIx64" State: %-12s Type: %-14s Attrs: 0x%-x\n", - (uint64_t)le64_to_cpu(desc->zslba), (uint64_t)le64_to_cpu(desc->wp), - (uint64_t)le64_to_cpu(desc->zcap), ss_zone_state_to_string(desc->zs >> 4), - ss_zone_type_to_string(desc->zt), desc->za); - if(_ztest == nullptr && (desc->zs >> 4) == NVME_ZNS_ZS_EMPTY){ - // pick the first zone which is empty to do I/O experiments - nothing clever here + std::cout << "\t SLBA: 0x%-8" << le64_to_cpu(desc->zslba) + << " WP: 0x%-8" << le64_to_cpu(desc->wp) + << " Cap: 0x%-8" << le64_to_cpu(desc->zcap) + << " State: " << std::setw(12) << std::setfill(' ') + << ss_zone_state_to_string(desc->zs >> 4) + << " Type: " << std::setw(14) << std::setfill(' ') + << ss_zone_type_to_string(desc->zt) + << " Attrs: 0x" << desc->za << std::endl; + // pick the first zone which is empty to do I/O experiments + if (!_ztest && desc->zs >> 4 == NVME_ZNS_ZS_EMPTY) _ztest = desc; - } - desc++; + ++desc; } // if could be the case we did not find any empty zone - if(_ztest != nullptr){ + if (_ztest) { ret = 0; - memcpy(&ztest->desc, _ztest, sizeof(*_ztest)); + memcpy(&ztest.desc, _ztest, sizeof(*_ztest)); } else { - printf("Error: I could not find a free empty zone to test, perhaps reset the zones with: sudo nvme zns reset-zone -a /dev/nvme0n1 \n"); + std::cout << "Error: I could not find a free empty zone to test, \ +perhaps reset the zones with: sudo nvme zns reset-zone -a /dev/nvme0n1" + << std::endl; ret = -ENOENT; } // now we copy and return the zone values to do experiment on - free(zone_reports); return ret; } -int ss_nvme_device_io_with_mdts(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size, - uint64_t lba_size, uint64_t mdts_size, bool read){ + +int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, + unsigned long long slba, + void *buffer, unsigned buf_size, + const uint32_t &lba_size, + const uint32_t &mdts_size, const bool &read) +{ //FIXME: - int errno; - int current_lba = slba; - uint64_t completed_size = 0; - void *temp = malloc(mdts_size); - int iteration = 0; - errno = 0; - while((errno == 0) && (completed_size < buf_size)) { - uint64_t size = buf_size-completed_size < mdts_size ? buf_size-completed_size : mdts_size; - int no_blocks = floor(size/lba_size); - memcpy(temp, (char *)buffer+(iteration*mdts_size), size); - if (!read) - errno = ss_nvme_device_write(fd, nsid, current_lba, no_blocks, temp, size); - if (read) { - errno = ss_nvme_device_read(fd,nsid,current_lba,no_blocks,temp,size); - memcpy((char *)buffer+(iteration*mdts_size),temp, size); - } - completed_size += size; - current_lba += no_blocks; - iteration++; + while (buf_size) { + unsigned size = buf_size < mdts_size ? buf_size : mdts_size; + unsigned short no_blocks = size / lba_size; + if (read) + ss_nvme_device_read(fd, nsid, slba, no_blocks, buffer, size); + else + ss_nvme_device_write(fd, nsid, slba, no_blocks, buffer, size); + if (errno) + return errno; + slba += no_blocks; + buffer = (char *)buffer + size; + buf_size -= size; } - free(temp); return errno; } -int ss_nvme_device_read(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size) { +int ss_nvme_device_read(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size) +{ //FIXME: - int errno; - void *mbuffer = NULL; - uint16_t control = 0, apptag = 0, appmask = 0; - uint32_t dsmgmt = 0, reftag = 0; - long long mbuffer_size = 0; - errno = nvme_read(fd, nsid, slba, numbers-1, control, dsmgmt, reftag, apptag, appmask, (long long) buf_size, buffer, mbuffer_size, - mbuffer); + nvme_read(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr); ss_nvme_show_status(errno); return errno; } -int ss_nvme_device_write(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size) { +int ss_nvme_device_write(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size) +{ //FIXME: - int errno; - void *mbuffer = NULL; - uint16_t control = 0, apptag = 0, appmask = 0; - uint32_t dsmgmt = 0, reftag = 0; - long long mbuffer_size = 0; - - errno = nvme_write(fd, nsid, slba, numbers-1, control, dsmgmt, 0, reftag, apptag, appmask, (long long) buf_size, buffer, mbuffer_size, - mbuffer); + nvme_write(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr); ss_nvme_show_status(errno); return errno; } -int ss_zns_device_zone_reset(int fd, uint32_t nsid, uint64_t slba) { +int ss_zns_device_zone_reset(const int &fd, const unsigned &nsid, + const unsigned long long &slba) +{ //FIXME: - int errno; - errno = nvme_zns_mgmt_send(fd, nsid, slba, false, NVME_ZNS_ZSA_RESET, 0, NULL); + nvme_zns_mgmt_send(fd, nsid, slba, true, NVME_ZNS_ZSA_RESET, 0U, nullptr); ss_nvme_show_status(errno); return errno; } // this does not take slba because it will return that -int ss_zns_device_zone_append(int fd, uint32_t nsid, uint64_t zslba, int numbers, void *buffer, uint32_t buf_size, uint64_t *written_slba){ +int ss_zns_device_zone_append(const int &fd, const unsigned &nsid, + const unsigned long long &zslba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size, + unsigned long long *written_slba) +{ //FIXME: - int errno; - void *mbuffer = NULL; - errno = nvme_zns_append(fd, nsid, zslba, numbers-1, 0, - 0, 0, 0, buf_size, buffer, 0, mbuffer,(long long unsigned int *) written_slba); + nvme_zns_append(fd, nsid, zslba, numbers - 1, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr, written_slba); ss_nvme_show_status(errno); return errno; } -void update_lba(uint64_t &write_lba, const uint32_t lba_size, const int count){ +void update_lba(unsigned long long &write_lba, const int &count) +{ //assert(false); - write_lba = write_lba + count; + write_lba += count; } // see 5.15.2.2 Identify Controller data structure (CNS 01h) -uint64_t get_mdts_size(int fd){ - //FIXME: - uint64_t size, mpsmin; - struct nvme_id_ctrl ctrl; - +uint32_t get_mdts_size(const int &fd) +{ + //FIXME: + nvme_id_ctrl ctrl; //Identify MDTS - nvme_identify_ctrl(fd,&ctrl); - //printf("MDTS : %d\n",ctrl.mdts); - + nvme_identify_ctrl(fd, &ctrl); //Identify MPSMIN - void *regs; - regs = mmap(NULL,getpagesize(),PROT_READ,MAP_SHARED,fd,0); - mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs)); - - size = pow(2,mpsmin) * pow(2,ctrl.mdts); + void *regs = mmap(nullptr, getpagesize(), PROT_READ, MAP_SHARED, + fd, 0L); + uint32_t mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs)); + munmap(regs, getpagesize()); + uint32_t size = pow(2.0, mpsmin) * pow(2.0, ctrl.mdts); return size; } + } diff --git a/src/m1/device.h b/src/m1/device.h index e9c4997..f2cd9a9 100644 --- a/src/m1/device.h +++ b/src/m1/device.h @@ -36,28 +36,40 @@ struct ss_nvme_ns { }; struct zone_to_test { - struct nvme_zns_desc desc; - uint64_t lba_size_in_use; + nvme_zns_desc desc; + uint32_t lba_size_in_use; }; // these three function examples are given to you int count_and_show_all_nvme_devices(); -int scan_and_identify_zns_devices(struct ss_nvme_ns *list); -int show_zns_zone_status(int fd, int nsid, struct zone_to_test *ztest); - +int scan_and_identify_zns_devices(ss_nvme_ns *list); +int show_zns_zone_status(const int &fd, const unsigned &nsid, + zone_to_test &ztest); // these follow nvme specification I added ss_ prefix to avoid namespace collision with other lbnvme functions -int ss_nvme_device_io_with_mdts(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size, - uint64_t lba_size, uint64_t mdts_size, bool read); -int ss_nvme_device_read(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size); -int ss_nvme_device_write(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size); - +int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, + unsigned long long slba, + void *buffer, unsigned buf_size, + const uint32_t &lba_size, + const uint32_t &mdts_size, const bool &read); +int ss_nvme_device_read(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size); +int ss_nvme_device_write(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size); // these are ZNS specific commands -int ss_zns_device_zone_reset(int fd, uint32_t nsid, uint64_t slba); -int ss_zns_device_zone_append(int fd, uint32_t nsid, uint64_t zslba, int numbers, void *buffer, uint32_t buf_size, - uint64_t *written_slba); +int ss_zns_device_zone_reset(const int &fd, const unsigned &nsid, + const unsigned long long &slba); +int ss_zns_device_zone_append(const int &fd, const unsigned &nsid, + const unsigned long long &zslba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size, + unsigned long long *written_slba); +void update_lba(unsigned long long &write_lba, const int &count); +uint32_t get_mdts_size(const int &fd); -void update_lba(uint64_t &write_lba, const uint32_t lba_size, const int count); -uint64_t get_mdts_size(int fd); } #endif //STOSYS_PROJECT_DEVICE_H diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index 6e914c0..a3ee329 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -20,21 +20,21 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include #include -#include - -#include #include -#include - +#include +#include #include "device.h" #include "../common/nvmeprint.h" #include "../common/utils.h" extern "C" { -static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest){ +static int test1_lba_io_test(const int &zfd, const unsigned &nsid, zone_to_test *ztest) +{ struct nvme_id_ns *s_nsid = nullptr; int ret; uint64_t test_lba_address = le64_to_cpu(ztest->desc.zslba); @@ -81,11 +81,11 @@ static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) // step 5: read all 5 and match the pattern do { // step 1: reset the whole zone - uint64_t write_lba = le64_to_cpu(ztest->desc.zslba), zone_slba = le64_to_cpu(ztest->desc.zslba); - uint64_t returned_slba = -1; + unsigned long long write_lba = le64_to_cpu(ztest->desc.zslba), zone_slba = le64_to_cpu(ztest->desc.zslba); + unsigned long long returned_slba = -1; ret = ss_zns_device_zone_reset(zfd, nsid, zone_slba); assert(ret == 0); - printf("zone at 0x%lx is reset successfully \n", zone_slba); + printf("zone at 0x%llx is reset successfully \n", zone_slba); // step 2: write 2x blocks, hence 2x the buffer size char *w_pattern2 = (char *) calloc (2 , ztest->lba_size_in_use); // I am writing these patterns in two stages so that I can test them independently. @@ -95,17 +95,17 @@ static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) ret = ss_nvme_device_write(zfd, nsid, le64_to_cpu(ztest->desc.zslba), 2, w_pattern2, 2 * ztest->lba_size_in_use); assert(ret == 0); printf("zone is written 2x successfully \n"); - update_lba(write_lba, ztest->lba_size_in_use, 2); + update_lba(write_lba, 2); // step 3: append 2x LBA blocks ret = ss_zns_device_zone_append(zfd, nsid, zone_slba, 2, w_pattern2, 2 * ztest->lba_size_in_use, &returned_slba); assert(ret == 0); - printf("zone is APPENDED 2x successfully, returned pointer is at %lx (to match %lx) \n", returned_slba, write_lba); + printf("zone is APPENDED 2x successfully, returned pointer is at %llx (to match %llx) \n", returned_slba, write_lba); // match that the returned pointer - which should be the original write ptr location. // returned pointer is where the data is appended (not where the write pointer _is_) - assert(returned_slba == write_lba); + assert(returned_slba == write_lba); // move the returned pointer to the +2 LBAs - we can now use the returned pointer - update_lba(returned_slba, ztest->lba_size_in_use, 2); + update_lba(returned_slba, 2); // step 4: write the 5th 1x LBA using the returned LBA from the append ret = ss_nvme_device_write(zfd, nsid, returned_slba, 1, w_pattern, ztest->lba_size_in_use); assert(ret == 0); @@ -133,7 +133,8 @@ static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) return ret; } -static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest){ +static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) +{ uint64_t zone_size_in_bytes = ztest->lba_size_in_use * ztest->desc.zcap; uint64_t zslba = le64_to_cpu(ztest->desc.zslba); uint64_t MDTS = get_mdts_size(zfd); @@ -150,7 +151,7 @@ static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test printf("Error: zone rest on 0x%lx failed, ret %d \n", zslba, ret); goto done; } - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, ztest->desc.zcap, data, zone_size_in_bytes, + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data, zone_size_in_bytes, ztest->lba_size_in_use, MDTS, false); @@ -160,7 +161,7 @@ static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test } // now read the zone bzero(data, zone_size_in_bytes); - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, ztest->desc.zcap, data, zone_size_in_bytes, + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data, zone_size_in_bytes, ztest->lba_size_in_use, MDTS, true); @@ -177,86 +178,80 @@ static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test return ret; } -int main() { - int ret, num_devices, fd, t1, t2; - uint32_t nsid; - struct ss_nvme_ns *my_devices, *zns_device; - struct nvme_id_ns ns{}; - struct zone_to_test ztest{}; - printf("============================================================== \n"); - printf("Welcome to M1. This is lot of ZNS/NVMe exploration \n"); - printf("============================================================== \n"); +int main() +{ + printf("==============================================================\n"); + printf("Welcome to M1. This is lot of ZNS/NVMe exploration\n"); + printf("==============================================================\n"); // scan all NVMe devices in the system - just like nvme list command - ret = count_and_show_all_nvme_devices(); - if(ret < 0){ - printf("the host device scans failed, %d \n", ret); + int ret = count_and_show_all_nvme_devices(); + if (ret < 0) { + printf("the host device scans failed, %d\n", ret); return ret; } // now we are going to allocate scan the returned number of devices to identify a ZNS device - num_devices = ret; - printf("total number of devices in the system is %d \n", num_devices); - if(num_devices == 0){ - printf("Error: failed to open any device, zero devices in the system? \n"); + int num_devices = ret; + printf("total number of devices in the system is %d\n", num_devices); + if (!num_devices) { + printf("Error: failed to open any device, zero devices in the system?\n"); return -ENODEV; } - my_devices = (struct ss_nvme_ns *) calloc (num_devices, sizeof(*my_devices)); - if(!my_devices){ - printf("failed calloc, -ENOMEM \n"); - return -12; - } - ret = scan_and_identify_zns_devices(my_devices); - if(ret < 0){ + std::unique_ptr my_devices(new ss_nvme_ns[num_devices]()); + ret = scan_and_identify_zns_devices(my_devices.get()); + if (ret < 0) { printf("scanning of the devices failed %d\n", ret); return ret; } - for(int i = 0; i < num_devices; i++){ - printf("namespace: %s and zns %s \n", my_devices[i].ctrl_name, (my_devices[i].supports_zns ? "YES" : "NO")); - if(my_devices[i].supports_zns) { - // with this we will just pick the last ZNS device to work with + ss_nvme_ns *zns_device = nullptr; + for (int i = 0; i < num_devices; ++i) { + printf("namespace: %s and zns %s\n", my_devices[i].ctrl_name, (my_devices[i].supports_zns ? "YES" : "NO")); + // with this we will just pick the last ZNS device to work with + if (my_devices[i].supports_zns) zns_device = &my_devices[i]; - } } - printf("Opening the device at %s \n", zns_device->ctrl_name); - fd = nvme_open(zns_device->ctrl_name); - if(fd < 0){ - printf("device %s opening failed %d errno %d \n", zns_device->ctrl_name, fd, errno); + printf("Opening the device at %s\n", zns_device->ctrl_name); + int fd = nvme_open(zns_device->ctrl_name); + if (fd < 0) { + printf("device %s opening failed %d errno %d\n", zns_device->ctrl_name, fd, errno); return -fd; } - printf("device %s opened successfully %d \n", zns_device->ctrl_name, fd); + printf("device %s opened successfully %d\n", zns_device->ctrl_name, fd); // now try to retrieve the NVMe namespace details - step 1 get the id + unsigned nsid = 0U; ret = nvme_get_nsid(fd, &nsid); - if(ret != 0){ - printf("ERROR: failed to retrieve the nsid %d \n", ret); + if (ret) { + printf("ERROR: failed to retrieve the nsid %d\n", ret); return ret; } // with the id now we can query the identify namespace - see figure 249, section 5.15.2 in the NVMe specification + nvme_id_ns ns; ret = nvme_identify_ns(fd, nsid, &ns); - if(ret){ - printf("ERROR: failed to retrieve the nsid %d \n", ret); + if (ret) { + printf("ERROR: failed to retrieve the nsid %d\n", ret); return ret; } ss_nvme_show_id_ns(&ns); printf("number of LBA formats? %d (a zero based value) \n", ns.nlbaf); // extract the in-use LBA size, it could be the case that the device supports multiple LBA size + zone_to_test ztest; ztest.lba_size_in_use = 1 << ns.lbaf[(ns.flbas & 0xf)].ds; - printf("the LBA size is %lu bytes \n", ztest.lba_size_in_use); + printf("the LBA size is %u bytes \n", ztest.lba_size_in_use); // this function shows the zone status and then return the first empty zone to do experiments on in ztest - ret = show_zns_zone_status(fd, nsid, &ztest); - if ( ret != 0) { + ret = show_zns_zone_status(fd, nsid, ztest); + if (ret) { printf("failed to get a workable zone, ret %d \n", ret); return ret; } - t1 = test1_lba_io_test(fd, nsid, &ztest); - t2 = test2_zone0_full_io_test(fd, nsid, &ztest); + int t1 = test1_lba_io_test(fd, nsid, &ztest); + int t2 = test2_zone0_full_io_test(fd, nsid, &ztest); printf("====================================================================\n"); printf("Milestone 1 results \n"); printf("Test 1 (read, write, append, reset) : %s \n", (t1 == 0 ? " Passed" : " Failed")); printf("Test 2 (Large zone read, write) : %s \n", (t2 == 0 ? " Passed" : " Failed")); printf("====================================================================\n"); - for(int i = 0; i < num_devices; i++) { + for(int i = 0; i < num_devices; ++i) free(my_devices[i].ctrl_name); - } - free(my_devices); return 0; } + } diff --git a/src/m45-rocksdb/m45_main.cc b/src/m45-rocksdb/m45_main.cc index 266860c..dd88cec 100644 --- a/src/m45-rocksdb/m45_main.cc +++ b/src/m45-rocksdb/m45_main.cc @@ -48,7 +48,7 @@ static std::string genrate_random_string(const int len) { static int fill_up_map(std::map &testmap, int entries, int ksize, int vsize){ int count = 0; - while(testmap.size() != entries) { + while(testmap.size() != static_cast(entries)) { // the problem is that with small key sizes, we might run out of unique keys to insert, hence // we append the count at the end to make them unique and then dynamically adjust the value size to // control the total bytes of data inserted in the database @@ -83,10 +83,10 @@ static void destroy_myrocks_context(struct MyRocksContext *&ctx){ delete[] ctx; } -static void print_myrocks_context(struct MyRocksContext *ctx){ - assert(ctx != nullptr); - std::cout<<" uri: " << ctx->uri << " fs_attached: " << ctx->db->GetFileSystem()->Name() << " \n"; -} +// static void print_myrocks_context(struct MyRocksContext *ctx){ +// assert(ctx != nullptr); +// std::cout<<" uri: " << ctx->uri << " fs_attached: " << ctx->db->GetFileSystem()->Name() << " \n"; +// } // posix takes: posix://.*" // s2fs-rocksdb takes takes: s2fs:.*://.* From 827e23612194c47443bc31cf849248adb42550d4 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 14 Oct 2022 20:56:17 +0000 Subject: [PATCH 064/101] finish m1 --- src/common/nvmeprint.cpp | 38 +++--- src/m1/device.cpp | 22 +-- src/m1/device.h | 4 +- src/m1/m1.cpp | 287 ++++++++++++++++++++++----------------- 4 files changed, 191 insertions(+), 160 deletions(-) diff --git a/src/common/nvmeprint.cpp b/src/common/nvmeprint.cpp index 063cebe..31b6982 100644 --- a/src/common/nvmeprint.cpp +++ b/src/common/nvmeprint.cpp @@ -311,25 +311,25 @@ const char *ss_nvme_status_to_string(__u16 status) { } } -static const char *nvme_feature_lba_type_to_string(__u8 type) { - switch (type) { - case 0: - return "Reserved"; - case 1: - return "Filesystem"; - case 2: - return "RAID"; - case 3: - return "Cache"; - case 4: - return "Page / Swap file"; - default: - if (type >= 0x05 && type <= 0x7f) - return "Reserved"; - else - return "Vendor Specific"; - } -} +// static const char *nvme_feature_lba_type_to_string(__u8 type) { +// switch (type) { +// case 0: +// return "Reserved"; +// case 1: +// return "Filesystem"; +// case 2: +// return "RAID"; +// case 3: +// return "Cache"; +// case 4: +// return "Page / Swap file"; +// default: +// if (type >= 0x05 && type <= 0x7f) +// return "Reserved"; +// else +// return "Vendor Specific"; +// } +// } static void nvme_show_id_ns_nsfeat(__u8 nsfeat) { __u8 rsvd = (nsfeat & 0xE0) >> 5; diff --git a/src/m1/device.cpp b/src/m1/device.cpp index 8369aa8..cd74979 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -22,6 +22,7 @@ SOFTWARE. #include #include +#include #include #include #include @@ -217,14 +218,10 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztes nvme_zns_desc *_ztest = nullptr; for (uint64_t i = 0; i < num_zones; ++i) { // see figure 39 for description of these fields - std::cout << "\t SLBA: 0x%-8" << le64_to_cpu(desc->zslba) - << " WP: 0x%-8" << le64_to_cpu(desc->wp) - << " Cap: 0x%-8" << le64_to_cpu(desc->zcap) - << " State: " << std::setw(12) << std::setfill(' ') - << ss_zone_state_to_string(desc->zs >> 4) - << " Type: " << std::setw(14) << std::setfill(' ') - << ss_zone_type_to_string(desc->zt) - << " Attrs: 0x" << desc->za << std::endl; + printf("\t SLBA: 0x%-8" PRIx64" WP: 0x%-8" PRIx64" Cap: 0x%-8" PRIx64" State: %-12s Type: %-14s Attrs: 0x%-x\n", + le64_to_cpu(desc->zslba), le64_to_cpu(desc->wp), + le64_to_cpu(desc->zcap), ss_zone_state_to_string(desc->zs >> 4), + ss_zone_type_to_string(desc->zt), desc->za); // pick the first zone which is empty to do I/O experiments if (!_ztest && desc->zs >> 4 == NVME_ZNS_ZS_EMPTY) _ztest = desc; @@ -246,13 +243,13 @@ perhaps reset the zones with: sudo nvme zns reset-zone -a /dev/nvme0n1" int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, unsigned long long slba, - void *buffer, unsigned buf_size, + void *buffer, uint64_t buf_size, const uint32_t &lba_size, const uint32_t &mdts_size, const bool &read) { //FIXME: while (buf_size) { - unsigned size = buf_size < mdts_size ? buf_size : mdts_size; + unsigned size = buf_size < (mdts_size - 2U) * lba_size ? buf_size : (mdts_size - 2U) * lba_size; unsigned short no_blocks = size / lba_size; if (read) ss_nvme_device_read(fd, nsid, slba, no_blocks, buffer, size); @@ -275,7 +272,6 @@ int ss_nvme_device_read(const int &fd, const unsigned &nsid, //FIXME: nvme_read(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, buf_size, buffer, 0U, nullptr); - ss_nvme_show_status(errno); return errno; } @@ -287,7 +283,6 @@ int ss_nvme_device_write(const int &fd, const unsigned &nsid, //FIXME: nvme_write(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, 0U, buf_size, buffer, 0U, nullptr); - ss_nvme_show_status(errno); return errno; } @@ -296,7 +291,6 @@ int ss_zns_device_zone_reset(const int &fd, const unsigned &nsid, { //FIXME: nvme_zns_mgmt_send(fd, nsid, slba, true, NVME_ZNS_ZSA_RESET, 0U, nullptr); - ss_nvme_show_status(errno); return errno; } @@ -310,13 +304,11 @@ int ss_zns_device_zone_append(const int &fd, const unsigned &nsid, //FIXME: nvme_zns_append(fd, nsid, zslba, numbers - 1, 0U, 0U, 0U, 0U, buf_size, buffer, 0U, nullptr, written_slba); - ss_nvme_show_status(errno); return errno; } void update_lba(unsigned long long &write_lba, const int &count) { - //assert(false); write_lba += count; } diff --git a/src/m1/device.h b/src/m1/device.h index f2cd9a9..4b97666 100644 --- a/src/m1/device.h +++ b/src/m1/device.h @@ -24,8 +24,6 @@ SOFTWARE. #ifndef STOSYS_PROJECT_DEVICE_H #define STOSYS_PROJECT_DEVICE_H -#include - extern "C" { // we will use an ss_ extension to differentiate our struct definitions from the standard library // In C++ we should use namespaces, but I am lazy @@ -48,7 +46,7 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, // these follow nvme specification I added ss_ prefix to avoid namespace collision with other lbnvme functions int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, unsigned long long slba, - void *buffer, unsigned buf_size, + void *buffer, uint64_t buf_size, const uint32_t &lba_size, const uint32_t &mdts_size, const bool &read); int ss_nvme_device_read(const int &fd, const unsigned &nsid, diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index a3ee329..6997221 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -22,9 +22,9 @@ SOFTWARE. #include #include -#include #include #include +#include #include #include #include "device.h" @@ -33,45 +33,48 @@ SOFTWARE. extern "C" { -static int test1_lba_io_test(const int &zfd, const unsigned &nsid, zone_to_test *ztest) +static int test1_lba_io_test(const int &zfd, const unsigned &nsid, + const zone_to_test &ztest) { - struct nvme_id_ns *s_nsid = nullptr; - int ret; - uint64_t test_lba_address = le64_to_cpu(ztest->desc.zslba); - - ret = nvme_identify_ns(zfd, nsid, s_nsid); - if(ret != 0){ - printf("Failed to identify the controller \n"); + nvme_id_ns *s_nsid = nullptr; + uint64_t test_lba_address = le64_to_cpu(ztest.desc.zslba); + int ret = nvme_identify_ns(zfd, nsid, s_nsid); + if (ret) { + std::cout << "Failed to identify the controller" << std::endl; return -1; } - // we know the Zone SIZE and CAPACITY, see https://zonedstorage.io/introduction/zns/ + // we know the Zone SIZE and CAPACITY, + // see https://zonedstorage.io/introduction/zns/ // (the difference between size and capacity) // Step 0: prepare the test pattern buffer - char *w_pattern = (char *) calloc (1, ztest->lba_size_in_use); - char *r_pattern = (char *) calloc (1, ztest->lba_size_in_use); - - assert(w_pattern != nullptr); - assert(r_pattern != nullptr); - - write_pattern(w_pattern, ztest->lba_size_in_use); - // Step 1: this is an empty zone because we choose to pick so, lets write the first LBA - ret = ss_nvme_device_write(zfd, nsid, test_lba_address, 1, w_pattern, ztest->lba_size_in_use); - if(ret != 0){ - printf("ERROR: writing failed on the zone? ret %d \n", ret); + std::unique_ptr w_pattern(new char[ztest.lba_size_in_use]()); + std::unique_ptr r_pattern(new char[ztest.lba_size_in_use]()); + assert(w_pattern); + assert(r_pattern); + write_pattern(w_pattern.get(), ztest.lba_size_in_use); + // Step 1: this is an empty zone because we choose to pick so, + // lets write the first LBA + ret = ss_nvme_device_write(zfd, nsid, test_lba_address, 1U, + w_pattern.get(), ztest.lba_size_in_use); + if (ret) { + std::cout << "ERROR: writing failed on the zone? ret " + << ret << std::endl; goto done; } - printf("OK, success in writing the zone \n"); + std::cout << "OK, success in writing the zone" << std::endl; // step 2: read the pattern, the same logic - ret = ss_nvme_device_read(zfd, nsid, test_lba_address, 1, r_pattern, ztest->lba_size_in_use); - if(ret != 0){ - printf("ERROR: reading failed on the zone? ret %d \n", ret); + ret = ss_nvme_device_read(zfd, nsid, test_lba_address, 1U, + r_pattern.get(), ztest.lba_size_in_use); + if (ret) { + std::cout << "ERROR: reading failed on the zone? ret " + << ret << std::endl; goto done; } - printf("OK, success in reading the zone \n"); - printf("Matching pattern ...\n"); - match_pattern(r_pattern, ztest->lba_size_in_use); - printf("SUCCESS: pattern matched for a simple R/W test \n"); + std::cout << "OK, success in reading the zone" << std::endl; + std::cout << "Matching pattern ..." << std::endl; + match_pattern(r_pattern.get(), ztest.lba_size_in_use); + std::cout << "SUCCESS: pattern matched for a simple R/W test" << std::endl; // starting a looping test with zone reset // this test // step 1: resets a zone @@ -81,174 +84,212 @@ static int test1_lba_io_test(const int &zfd, const unsigned &nsid, zone_to_test // step 5: read all 5 and match the pattern do { // step 1: reset the whole zone - unsigned long long write_lba = le64_to_cpu(ztest->desc.zslba), zone_slba = le64_to_cpu(ztest->desc.zslba); + unsigned long long write_lba = le64_to_cpu(ztest.desc.zslba); + unsigned long long zone_slba = le64_to_cpu(ztest.desc.zslba); unsigned long long returned_slba = -1; ret = ss_zns_device_zone_reset(zfd, nsid, zone_slba); - assert(ret == 0); - printf("zone at 0x%llx is reset successfully \n", zone_slba); + assert(!ret); + std::cout << "zone at 0x" << std::hex << zone_slba + << " is reset successfully" << std::endl; // step 2: write 2x blocks, hence 2x the buffer size - char *w_pattern2 = (char *) calloc (2 , ztest->lba_size_in_use); - // I am writing these patterns in two stages so that I can test them independently. - // nothing smart here, actually more like a dumb idea. But I like dumb working code :) - write_pattern(w_pattern2, ztest->lba_size_in_use); - write_pattern(w_pattern2 + ztest->lba_size_in_use, ztest->lba_size_in_use); - ret = ss_nvme_device_write(zfd, nsid, le64_to_cpu(ztest->desc.zslba), 2, w_pattern2, 2 * ztest->lba_size_in_use); - assert(ret == 0); - printf("zone is written 2x successfully \n"); + std::unique_ptr w_pattern2(new char[2UL * + ztest.lba_size_in_use]()); + // I am writing these patterns in two stages + // so that I can test them independently. + // nothing smart here, actually more like a dumb idea. + // But I like dumb working code :) + write_pattern(w_pattern2.get(), ztest.lba_size_in_use); + write_pattern(w_pattern2.get() + ztest.lba_size_in_use, + ztest.lba_size_in_use); + ret = ss_nvme_device_write(zfd, nsid, le64_to_cpu(ztest.desc.zslba), + 2U, w_pattern2.get(), + 2U * ztest.lba_size_in_use); + assert(!ret); + std::cout << "zone is written 2x successfully" << std::endl; update_lba(write_lba, 2); // step 3: append 2x LBA blocks - ret = ss_zns_device_zone_append(zfd, nsid, zone_slba, 2, w_pattern2, - 2 * ztest->lba_size_in_use, &returned_slba); - assert(ret == 0); - printf("zone is APPENDED 2x successfully, returned pointer is at %llx (to match %llx) \n", returned_slba, write_lba); - // match that the returned pointer - which should be the original write ptr location. - // returned pointer is where the data is appended (not where the write pointer _is_) + ret = ss_zns_device_zone_append(zfd, nsid, zone_slba, 2U, + w_pattern2.get(), + 2U * ztest.lba_size_in_use, + &returned_slba); + assert(!ret); + std::cout << "zone is APPENDED 2x successfully, returned pointer is at " + << std::hex << returned_slba << " (to match " + << std::hex << write_lba << ")" << std::endl; + // match that the returned pointer - + // which should be the original write ptr location. + // returned pointer is where the data is appended + // (not where the write pointer _is_) assert(returned_slba == write_lba); - // move the returned pointer to the +2 LBAs - we can now use the returned pointer + // move the returned pointer to the +2 LBAs - + // we can now use the returned pointer update_lba(returned_slba, 2); // step 4: write the 5th 1x LBA using the returned LBA from the append - ret = ss_nvme_device_write(zfd, nsid, returned_slba, 1, w_pattern, ztest->lba_size_in_use); - assert(ret == 0); - printf("The final write is ok too, we should be at 5x LBAs writes now \n"); + ret = ss_nvme_device_write(zfd, nsid, returned_slba, 1U, + w_pattern.get(), ztest.lba_size_in_use); + assert(!ret); + std::cout << "The final write is ok too, we should be at 5x LBAs \ +writes now" << std::endl; // read all 5 blocks and match their patterns - char *r_pattern2 = (char *) calloc (5, ztest->lba_size_in_use); + std::unique_ptr r_pattern2(new char[5UL * + ztest.lba_size_in_use]()); // read from the start - ret = ss_nvme_device_read(zfd, nsid, zone_slba, 5, r_pattern2, 5 * ztest->lba_size_in_use); - assert(ret == 0); - printf("The final 5x read is ok, matching pattern ... \n"); + ret = ss_nvme_device_read(zfd, nsid, zone_slba, 5U, r_pattern2.get(), + 5U * ztest.lba_size_in_use); + assert(!ret); + std::cout << "The final 5x read is ok, matching pattern ..." + << std::endl; // now test them individually - for(int i = 0 ; i < 5; i++){ - printf("\t testing the %d buffer out of 5...", i); - match_pattern(r_pattern2 + (i * ztest->lba_size_in_use), ztest->lba_size_in_use); - printf(" passed \n"); + for (int i = 0 ; i < 5; ++i) { + std::cout << "\t testing the " << i << " buffer out of 5..."; + match_pattern(r_pattern2.get() + i * ztest.lba_size_in_use, + ztest.lba_size_in_use); + std::cout << " passed" << std::endl; } - free(r_pattern2); - free(w_pattern2); - }while(false); + } while(0); done: - free(w_pattern); - free(r_pattern); - printf("ZNS I/O testing finished, status %d \n", ret); + std::cout << "ZNS I/O testing finished, status " << ret << std::endl; return ret; } -static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) +static int test2_zone0_full_io_test(const int &zfd, const unsigned &nsid, + const zone_to_test &ztest) { - uint64_t zone_size_in_bytes = ztest->lba_size_in_use * ztest->desc.zcap; - uint64_t zslba = le64_to_cpu(ztest->desc.zslba); - uint64_t MDTS = get_mdts_size(zfd); - printf("Test 3: testing the max writing capacity of the device, trying to read and write a complete zone of size %lu bytes \n", - zone_size_in_bytes); - uint8_t *data = (uint8_t *) calloc(1, zone_size_in_bytes); - assert(data != nullptr); - - write_pattern((char*) data, zone_size_in_bytes); + uint64_t zone_size_in_bytes = ztest.lba_size_in_use * ztest.desc.zcap; + unsigned long long zslba = le64_to_cpu(ztest.desc.zslba); + uint32_t MDTS = get_mdts_size(zfd); + std::cout << "Test 3: testing the max writing capacity of the device, \ +trying to read and write a complete zone of size " + << zone_size_in_bytes << " bytes" << std::endl; + std::unique_ptr data(new char[zone_size_in_bytes]()); + assert(data); + write_pattern(data.get(), zone_size_in_bytes); // now reset, and then write the full zone - printf("\t trying to reset the zone at 0x%lx \n", zslba); + std::cout << "\t trying to reset the zone at 0x" + << std::hex << zslba << std::endl; int ret = ss_zns_device_zone_reset(zfd, nsid, zslba); - if(ret != 0){ - printf("Error: zone rest on 0x%lx failed, ret %d \n", zslba, ret); + if (ret) { + std::cout << "Error: zone rest on 0x" + << std::hex << zslba << " failed, ret " << ret << std::endl; goto done; } - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data, zone_size_in_bytes, - ztest->lba_size_in_use, - MDTS, - false); - if(ret != 0){ - printf("Error: zone writing on 0x%lx failed, ret %d \n", zslba, ret); + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data.get(), + zone_size_in_bytes, ztest.lba_size_in_use, + MDTS, false); + if (ret) { + std::cout << "Error: zone writing on 0x" + << std::hex << zslba << " failed, ret " << ret << std::endl; goto done; } // now read the zone - bzero(data, zone_size_in_bytes); - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data, zone_size_in_bytes, - ztest->lba_size_in_use, - MDTS, - true); - if(ret != 0){ - printf("Error: zone reading on 0x%lx failed, ret %d \n", zslba, ret); + bzero(data.get(), zone_size_in_bytes); + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data.get(), + zone_size_in_bytes, ztest.lba_size_in_use, + MDTS, true); + if (ret) { + std::cout << "Error: zone reading on 0x" + << std::hex << zslba << " failed, ret " << ret << std::endl; goto done; } - printf("\t the whole zone reading done \n"); - match_pattern((char*) data, zone_size_in_bytes); - printf("OK: the whole zone pattern matched \n"); - + std::cout << "\t the whole zone reading done" << std::endl; + match_pattern(data.get(), zone_size_in_bytes); + std::cout << "OK: the whole zone pattern matched" << std::endl; done: - free(data); return ret; } int main() { - printf("==============================================================\n"); - printf("Welcome to M1. This is lot of ZNS/NVMe exploration\n"); - printf("==============================================================\n"); + std::cout << "=============================================================\ +=" << std::endl; + std::cout << "Welcome to M1. This is lot of ZNS/NVMe exploration" + << std::endl; + std::cout << "=============================================================\ +=" << std::endl; // scan all NVMe devices in the system - just like nvme list command int ret = count_and_show_all_nvme_devices(); if (ret < 0) { - printf("the host device scans failed, %d\n", ret); + std::cout << "the host device scans failed, " << ret << std::endl; return ret; } - // now we are going to allocate scan the returned number of devices to identify a ZNS device + // now we are going to allocate scan the returned number of devices + // to identify a ZNS device int num_devices = ret; - printf("total number of devices in the system is %d\n", num_devices); + std::cout << "total number of devices in the system is " + << num_devices << std::endl; if (!num_devices) { - printf("Error: failed to open any device, zero devices in the system?\n"); + std::cout << "Error: failed to open any device, zero devices in the \ +system?" << std::endl; return -ENODEV; } std::unique_ptr my_devices(new ss_nvme_ns[num_devices]()); ret = scan_and_identify_zns_devices(my_devices.get()); if (ret < 0) { - printf("scanning of the devices failed %d\n", ret); + std::cout << "scanning of the devices failed" << std::endl; return ret; } ss_nvme_ns *zns_device = nullptr; for (int i = 0; i < num_devices; ++i) { - printf("namespace: %s and zns %s\n", my_devices[i].ctrl_name, (my_devices[i].supports_zns ? "YES" : "NO")); + std::cout << "namespace: " << my_devices[i].ctrl_name + << " and zns " << (my_devices[i].supports_zns ? "YES" : "NO") + << std::endl; // with this we will just pick the last ZNS device to work with if (my_devices[i].supports_zns) zns_device = &my_devices[i]; } - printf("Opening the device at %s\n", zns_device->ctrl_name); + std::cout << "Opening the device at " << zns_device->ctrl_name << std::endl; int fd = nvme_open(zns_device->ctrl_name); if (fd < 0) { - printf("device %s opening failed %d errno %d\n", zns_device->ctrl_name, fd, errno); + std::cout << "device " << zns_device->ctrl_name + << " opening failed " << fd + << " errno " << errno << std::endl; return -fd; } - printf("device %s opened successfully %d\n", zns_device->ctrl_name, fd); + std::cout << "device " << zns_device->ctrl_name + << " opened successfully " << fd << std::endl; // now try to retrieve the NVMe namespace details - step 1 get the id unsigned nsid = 0U; ret = nvme_get_nsid(fd, &nsid); if (ret) { - printf("ERROR: failed to retrieve the nsid %d\n", ret); + std::cout << "ERROR: failed to retrieve the nsid " << ret << std::endl; return ret; } - // with the id now we can query the identify namespace - see figure 249, section 5.15.2 in the NVMe specification + // with the id now we can query the identify namespace - + // see figure 249, section 5.15.2 in the NVMe specification nvme_id_ns ns; ret = nvme_identify_ns(fd, nsid, &ns); if (ret) { - printf("ERROR: failed to retrieve the nsid %d\n", ret); + std::cout << "ERROR: failed to retrieve the nsid " << ret << std::endl; return ret; } ss_nvme_show_id_ns(&ns); - printf("number of LBA formats? %d (a zero based value) \n", ns.nlbaf); - // extract the in-use LBA size, it could be the case that the device supports multiple LBA size + std::cout << "number of LBA formats? " << ns.nlbaf + << " (a zero based value)" << std::endl; + // extract the in-use LBA size, + // it could be the case that the device supports multiple LBA size zone_to_test ztest; - ztest.lba_size_in_use = 1 << ns.lbaf[(ns.flbas & 0xf)].ds; - printf("the LBA size is %u bytes \n", ztest.lba_size_in_use); - // this function shows the zone status and then return the first empty zone to do experiments on in ztest + ztest.lba_size_in_use = 1U << ns.lbaf[(ns.flbas & 0xf)].ds; + std::cout << "the LBA size is " << ztest.lba_size_in_use + << " bytes" << std::endl; + // this function shows the zone status + // and then return the first empty zone to do experiments on in ztest ret = show_zns_zone_status(fd, nsid, ztest); if (ret) { - printf("failed to get a workable zone, ret %d \n", ret); + std::cout << "failed to get a workable zone, ret " << ret << std::endl; return ret; } - int t1 = test1_lba_io_test(fd, nsid, &ztest); - int t2 = test2_zone0_full_io_test(fd, nsid, &ztest); - printf("====================================================================\n"); - printf("Milestone 1 results \n"); - printf("Test 1 (read, write, append, reset) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("Test 2 (Large zone read, write) : %s \n", (t2 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); + int t1 = test1_lba_io_test(fd, nsid, ztest); + int t2 = test2_zone0_full_io_test(fd, nsid, ztest); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 1 results" << std::endl; + std::cout << "Test 1 (read, write, append, reset) : " + << (t1 == 0 ? " Passed" : " Failed") << std::endl; + std::cout << "Test 2 (Large zone read, write) : " + << (t2 == 0 ? " Passed" : " Failed") << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; for(int i = 0; i < num_devices; ++i) free(my_devices[i].ctrl_name); return 0; From 63a27b7f8e3ef3f32905f89facdfd197ee9c097f Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 14 Oct 2022 21:04:13 +0000 Subject: [PATCH 065/101] complete m1 --- src/m1/device.cpp | 56 ++++++++++++++++++++++++++++++----------------- src/m1/device.h | 6 +++-- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/m1/device.cpp b/src/m1/device.cpp index cd74979..438a34e 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -33,7 +33,8 @@ SOFTWARE. #include "device.h" #include "../common/nvmeprint.h" -// Examples lifted from, https://github.com/linux-nvme/libnvme/blob/667334ff8c53dbbefa51948bbe2e086624bf4d0d/test/cpp.cc +// Examples lifted from, +// https://github.com/linux-nvme/libnvme/blob/667334ff8c53dbbefa51948bbe2e086624bf4d0d/test/cpp.cc int count_and_show_all_nvme_devices() { nvme_host_t h; @@ -128,8 +129,8 @@ int scan_and_identify_zns_devices(ss_nvme_ns *list) &list[ns_counter].nsid); int ret = nvme_ns_identify(nspace, &ns); if (ret) { - std::cout << "ERROR : failed to identify the namespace with " - << ret << " and errno " << errno << std::endl; + std::cout << "ERROR : failed to identify the namespace \ +with " << ret << " and errno " << errno << std::endl; return ret; } ++ns_counter; @@ -141,7 +142,8 @@ int scan_and_identify_zns_devices(ss_nvme_ns *list) return 0; } -int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztest) +int show_zns_zone_status(const int &fd, const unsigned &nsid, + zone_to_test &ztest) { // ZNS specific data structures as specified in the TP 4053 // standard NVMe structures @@ -173,17 +175,25 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztes return ret; } ss_nvme_show_zns_id_ctrl(&s_zns_ctrlid); - // now we send the management related commands - see section 4.3 and 4.4 in TP 4053 - // we are now trying to retrieve the number of zones with other information present in the zone report - // the following function takes arguments that are required to filled the command structure as shown + // now we send the management related commands - + // see section 4.3 and 4.4 in TP 4053 + // we are now trying to retrieve the number of zones + // with other information present in the zone report + // the following function takes arguments + // that are required to filled the command structure as shown // in the figures 33-36 // * SLBA goes into CDW 10 and 11, as shown in Figure 34 - // * zras is Zone Receive Action Specific Features, see figure 36 for details - // * NVME_ZNS_ZRA_REPORT_ZONES and NVME_ZNS_ZRAS_REPORT_ALL are shown in Figure 36 CDW 13 + // * zras is Zone Receive Action Specific Features, + // see figure 36 for details + // * NVME_ZNS_ZRA_REPORT_ZONES and NVME_ZNS_ZRAS_REPORT_ALL + // are shown in Figure 36 CDW 13 - // Pay attention what is being passed in the zns_report pointer and size, I am passing a structure - // _WITHOUT_ its entries[] field initialized because we do not know how many zones does this namespace - // hence we first get the number of zones, and then try again to get the full report + // Pay attention what is being passed in the zns_report pointer and size, + // I am passing a structure + // _WITHOUT_ its entries[] field initialized + // because we do not know how many zones does this namespace + // hence we first get the number of zones, + // and then try again to get the full report nvme_zone_report zns_report; ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, @@ -195,10 +205,13 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztes // see figures 37-38-39 in section 4.4.1 uint64_t num_zones = le64_to_cpu(zns_report.nr_zones); printf("nr_zones:%" PRIu64"\n", num_zones); - // lets get more information about the zones - the total metadata size would be + // lets get more information about the zones - + // the total metadata size would be // see the figure 37 in the ZNS description - // so we allocated an structure with a flat memory and point the zone_reports to it - // An alternate strategy would have been just allocate a 4kB page and get some numbers of zone reports whatever can + // so we allocated an structure with a flat memory + // and point the zone_reports to it + // An alternate strategy would have been just allocate a 4kB page + // and get some numbers of zone reports whatever can // fit in that in a loop. uint64_t total_size = sizeof(zns_report) + num_zones * sizeof(nvme_zns_desc); @@ -218,10 +231,12 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztes nvme_zns_desc *_ztest = nullptr; for (uint64_t i = 0; i < num_zones; ++i) { // see figure 39 for description of these fields - printf("\t SLBA: 0x%-8" PRIx64" WP: 0x%-8" PRIx64" Cap: 0x%-8" PRIx64" State: %-12s Type: %-14s Attrs: 0x%-x\n", - le64_to_cpu(desc->zslba), le64_to_cpu(desc->wp), - le64_to_cpu(desc->zcap), ss_zone_state_to_string(desc->zs >> 4), - ss_zone_type_to_string(desc->zt), desc->za); + printf("\t SLBA: 0x%-8" PRIx64, le64_to_cpu(desc->zslba)); + printf(" WP: 0x%-8" PRIx64, le64_to_cpu(desc->wp)); + printf(" Cap: 0x%-8" PRIx64, le64_to_cpu(desc->zcap)); + printf(" State: %-12s", ss_zone_state_to_string(desc->zs >> 4)); + printf(" Type: %-14s", ss_zone_type_to_string(desc->zt)); + printf(" Attrs: 0x%-x\n", desc->za); // pick the first zone which is empty to do I/O experiments if (!_ztest && desc->zs >> 4 == NVME_ZNS_ZS_EMPTY) _ztest = desc; @@ -249,7 +264,8 @@ int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, { //FIXME: while (buf_size) { - unsigned size = buf_size < (mdts_size - 2U) * lba_size ? buf_size : (mdts_size - 2U) * lba_size; + unsigned size = buf_size < (mdts_size - 2U) * lba_size ? + buf_size : (mdts_size - 2U) * lba_size; unsigned short no_blocks = size / lba_size; if (read) ss_nvme_device_read(fd, nsid, slba, no_blocks, buffer, size); diff --git a/src/m1/device.h b/src/m1/device.h index 4b97666..7fe1173 100644 --- a/src/m1/device.h +++ b/src/m1/device.h @@ -25,7 +25,8 @@ SOFTWARE. #define STOSYS_PROJECT_DEVICE_H extern "C" { -// we will use an ss_ extension to differentiate our struct definitions from the standard library +// we will use an ss_ extension +// to differentiate our struct definitions from the standard library // In C++ we should use namespaces, but I am lazy struct ss_nvme_ns { char *ctrl_name; @@ -43,7 +44,8 @@ int count_and_show_all_nvme_devices(); int scan_and_identify_zns_devices(ss_nvme_ns *list); int show_zns_zone_status(const int &fd, const unsigned &nsid, zone_to_test &ztest); -// these follow nvme specification I added ss_ prefix to avoid namespace collision with other lbnvme functions +// these follow nvme specification I added ss_ prefix +// to avoid namespace collision with other lbnvme functions int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, unsigned long long slba, void *buffer, uint64_t buf_size, From 89a6d08ff95d35e8f568bc27a0b85600c8926357 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Fri, 14 Oct 2022 21:07:28 +0000 Subject: [PATCH 066/101] final m1 --- src/m1/device.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/m1/device.cpp b/src/m1/device.cpp index 438a34e..bcaa22d 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -21,7 +21,6 @@ SOFTWARE. */ #include -#include #include #include #include @@ -336,11 +335,10 @@ uint32_t get_mdts_size(const int &fd) //Identify MDTS nvme_identify_ctrl(fd, &ctrl); //Identify MPSMIN - void *regs = mmap(nullptr, getpagesize(), PROT_READ, MAP_SHARED, - fd, 0L); + void *regs = mmap(nullptr, getpagesize(), PROT_READ, MAP_SHARED, fd, 0L); uint32_t mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs)); munmap(regs, getpagesize()); - uint32_t size = pow(2.0, mpsmin) * pow(2.0, ctrl.mdts); + uint32_t size = 1U << (mpsmin + ctrl.mdts); return size; } From 449397704874542524705cbdf9cca4ae837e3d3f Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 00:11:33 +0000 Subject: [PATCH 067/101] change to c++ --- src/m1/m1.cpp | 17 ++- src/m23-ftl/m2.cpp | 212 +++++++++++++++----------- src/m23-ftl/m3.cpp | 359 ++++++++++++++++++++++++++------------------- 3 files changed, 343 insertions(+), 245 deletions(-) diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index 6997221..a486ee6 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -89,7 +89,7 @@ static int test1_lba_io_test(const int &zfd, const unsigned &nsid, unsigned long long returned_slba = -1; ret = ss_zns_device_zone_reset(zfd, nsid, zone_slba); assert(!ret); - std::cout << "zone at 0x" << std::hex << zone_slba + std::cout << "zone at 0x" << std::hex << zone_slba << std::dec << " is reset successfully" << std::endl; // step 2: write 2x blocks, hence 2x the buffer size std::unique_ptr w_pattern2(new char[2UL * @@ -114,8 +114,8 @@ static int test1_lba_io_test(const int &zfd, const unsigned &nsid, &returned_slba); assert(!ret); std::cout << "zone is APPENDED 2x successfully, returned pointer is at " - << std::hex << returned_slba << " (to match " - << std::hex << write_lba << ")" << std::endl; + << std::hex << returned_slba << std::dec << " (to match " + << std::hex << write_lba << std::dec << ")" << std::endl; // match that the returned pointer - // which should be the original write ptr location. // returned pointer is where the data is appended @@ -167,11 +167,12 @@ trying to read and write a complete zone of size " write_pattern(data.get(), zone_size_in_bytes); // now reset, and then write the full zone std::cout << "\t trying to reset the zone at 0x" - << std::hex << zslba << std::endl; + << std::hex << zslba << std::dec << std::endl; int ret = ss_zns_device_zone_reset(zfd, nsid, zslba); if (ret) { std::cout << "Error: zone rest on 0x" - << std::hex << zslba << " failed, ret " << ret << std::endl; + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data.get(), @@ -179,7 +180,8 @@ trying to read and write a complete zone of size " MDTS, false); if (ret) { std::cout << "Error: zone writing on 0x" - << std::hex << zslba << " failed, ret " << ret << std::endl; + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } // now read the zone @@ -189,7 +191,8 @@ trying to read and write a complete zone of size " MDTS, true); if (ret) { std::cout << "Error: zone reading on 0x" - << std::hex << zslba << " failed, ret " << ret << std::endl; + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } std::cout << "\t the whole zone reading done" << std::endl; diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index b0f3f4d..bb3c10c 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -20,103 +20,127 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - -#include -#include #include +#include #include #include +#include +#include +#include +#include #include "./zns_device.h" #include "../common/utils.h" extern "C" { -static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test){ - int ret = -1; +static int write_read_random_lbas(user_zns_device *my_dev, void *buf, + uint32_t buf_size, uint64_t max_lbas_to_test) +{ uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; - if(max_lba_entries < max_lbas_to_test){ - printf("Error: not sufficient LBAs available, pass a smaller number \n"); + if (max_lba_entries < max_lbas_to_test) { + std::cout << "Error: not sufficient LBAs available, pass a smaller \ +number" << std::endl; return -1; } - const uint64_t max_lba_to_generate = (max_lba_entries - max_lbas_to_test); + const uint64_t max_lba_to_generate = max_lba_entries - max_lbas_to_test; // lets pick a random start offset - const uint64_t start_lba = (0 + (rand() % (max_lba_to_generate - 0))); - // now starting from "s" lba, we are going to write out max_lbas_to_test LBAs - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + const uint64_t start_lba = 0UL + rand() % (max_lba_to_generate - 0UL); + // now starting from "s" lba, + // we are going to write out max_lbas_to_test LBAs + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - ith iteration - write_pattern_with_start((char*) buf, buf_size, i); - ret = zns_udevice_write(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + write_pattern_with_start(static_cast(buf), buf_size, i); + int ret = zns_udevice_write(my_dev, i * my_dev->lba_size_bytes, + buf, buf_size); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << i * my_dev->lba_size_bytes << std::dec + << " [index " << i - start_lba << "]" << std::endl; return ret; } } - printf("Writing of %lu unique LBAs OK \n", max_lbas_to_test); + std::cout << "Writing of " << max_lbas_to_test + << " unique LBAs OK" << std::endl; // otherwise all writes passed - now we test reading - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - bzero((char*) buf, buf_size); - ret = zns_udevice_read(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + bzero(static_cast(buf), buf_size); + int ret = zns_udevice_read(my_dev, i * my_dev->lba_size_bytes, + buf, buf_size); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << i * my_dev->lba_size_bytes << std::dec + << " [index " << i - start_lba << "]" << std::endl; return ret; } // now we match - for ith pattern - if it fails it asserts - match_pattern_with_start((char*) buf, buf_size, i); + match_pattern_with_start(static_cast(buf), buf_size, i); } - printf("Reading and matching of %lu unique LBAs OK \n", max_lbas_to_test); + std::cout << "Reading and matching of " << max_lbas_to_test + << " unique LBAs OK" << std::endl; return 0; } -static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size){ - write_pattern((char*) buf, buf_size); - uint64_t test_lba = 0; +static int write_read_lba0(user_zns_device *dev, void *buf, uint32_t buf_size) +{ + write_pattern(static_cast(buf), buf_size); + uint64_t test_lba = 0UL; int ret = zns_udevice_write(dev, test_lba, buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx \n", test_lba); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << test_lba << std::dec << std::endl; return ret; } - printf("%u bytes written successfully on lba 0x%lx \n", buf_size, test_lba); + std::cout << buf_size << " bytes written successfully on lba 0x" + << std::hex << test_lba << std::dec << std::endl; // zero it out bzero(buf, buf_size); ret = zns_udevice_read(dev, test_lba, buf, buf_size); - if(ret != 0){ - printf("Error: reading the device failed at address 0x%lx \n", test_lba); + if (ret) { + std::cout << "Error: reading the device failed at address 0x" + << std::hex << test_lba << std::dec << std::endl; return ret; } - printf("%u bytes read successfully on lba 0x%lx \n", buf_size, test_lba); - match_pattern((char*) buf, buf_size); + std::cout << buf_size << " bytes read successfully on lba 0x" + << std::hex << test_lba << std::dec << std::endl; + match_pattern(static_cast(buf), buf_size); return 0; } -static int show_help(){ - printf("Usage: m2 -d device_name -h -r \n"); - printf("-d : /dev/nvmeXpY - in this format with the full path \n"); - printf("-r : resume if the FTL can. \n"); - printf("-l : the number of zones to use for log/metadata (default, minimum = 3). \n"); - printf("-h : shows help, and exits with success. No argument needed\n"); +static int show_help() +{ + std::cout << "Usage: m2 -d device_name -h -r" << std::endl; + std::cout << "-d : /dev/nvmeXpY - in this format with the full path" + << std::endl; + std::cout << "-r : resume if the FTL can." << std::endl; + std::cout << "-l : the number of zones to use for log/metadata \ +(default, minimum = 3)." << std::endl; + std::cout << "-h : shows help, and exits with success. No argument needed" + << std::endl; return 0; } -int main(int argc, char **argv) { - uint64_t start, end; - start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); - int ret, c; - char *zns_device_name = (char*) "nvme0n1", *test_buf = nullptr, *str1 = nullptr; - struct user_zns_device *my_dev = nullptr; - struct zdev_init_params params; - params.force_reset = true; - params.log_zones = 3; - params.gc_wmark = 1; - - uint64_t max_num_lba_to_test = 0; - printf("===================================================================================== \n"); - printf("This is M2. The goal of this milestone is to implement a hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS (no GC) \n"); - printf("===================================================================================== \n"); +int main(int argc, char *argv[]) +{ + uint64_t start = microseconds_since_epoch(); + srand(static_cast(time(nullptr)) * getpid()); + zdev_init_params params = { + .name = nullptr, + .log_zones = 3, + .gc_wmark = 1, + .force_reset = true + }; + uint64_t max_num_lba_to_test = 0UL; + std::cout << "=============================================================\ +========================" << std::endl; + std::cout << "This is M2. The goal of this milestone is to implement a \ +hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS (no GC)" + << std::endl; + std::cout << "=============================================================\ +========================" << std::endl; + int c = 0; + char *zns_device_name = const_cast("nvme0n1"); + char *str1 = nullptr; while ((c = getopt(argc, argv, "l:d:hr")) != -1) { switch (c) { case 'h': @@ -128,14 +152,14 @@ int main(int argc, char **argv) { case 'd': str1 = strdupa(optarg); if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); + std::cout << "Could not parse the arguments for the device " + << optarg << std::endl; exit(EXIT_FAILURE); } - for (int j = 1; ; j++) { + for (int j = 1; ; ++j) { char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { + if (!token) break; - } // if there was a valid parse, just save it zns_device_name = token; } @@ -143,8 +167,10 @@ int main(int argc, char **argv) { break; case 'l': params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + if (params.log_zones < 3) { + std::cout << "you need 3 or more zones for the log area \ +(metadata (think: milestone 5) + log). You passed " + << params.log_zones << std::endl; exit(-1); } break; @@ -154,30 +180,46 @@ int main(int argc, char **argv) { } } params.name = strdup(zns_device_name); - printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s\n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no"); - ret = init_ss_zns_device(¶ms, &my_dev); - assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); - max_num_lba_to_test = (params.log_zones - 1) * (my_dev->tparams.zns_zone_capacity / my_dev->tparams.zns_lba_size); - printf("The amount of new pages to be written would be the number of (zones - 1) / lba_size : %lu \n", max_num_lba_to_test); - printf("Why? we assume one zone will eventually be taken for writing metadata, and the rest will be used for the FTL log \n"); - test_buf = static_cast(calloc(1, my_dev->lba_size_bytes)); - int t1 = write_read_lba0(my_dev, test_buf, my_dev->lba_size_bytes); + std::cout << "parameter settings are: device-name " << params.name + << " log_zones " << params.log_zones + << " gc-watermark " << params.gc_wmark + << " force-reset " << (params.force_reset ? "yes" : "no") + << std::endl; + user_zns_device *my_dev = nullptr; + int ret = init_ss_zns_device(¶ms, &my_dev); + assert (!ret); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); + max_num_lba_to_test = (params.log_zones - 1) * + (my_dev->tparams.zns_zone_capacity / + my_dev->tparams.zns_lba_size); + std::cout << "The amount of new pages to be written would be the number of \ +(zones - 1) / lba_size : " << max_num_lba_to_test << std::endl; + std::cout << "Why? we assume one zone will eventually be taken for writing \ +metadata, and the rest will be used for the FTL log" << std::endl; + std::unique_ptr test_buf(new char[my_dev->lba_size_bytes]()); + int t1 = write_read_lba0(my_dev, test_buf.get(), my_dev->lba_size_bytes); // -1 because we have already written one LBA. - int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, (max_num_lba_to_test - 1)); - free(test_buf); + int t2 = write_read_random_lbas(my_dev, test_buf.get(), + my_dev->lba_size_bytes, + max_num_lba_to_test - 1UL); ret = deinit_ss_zns_device(my_dev); free(params.name); - end = microseconds_since_epoch(); - printf("====================================================================\n"); - printf("Milestone 2 results \n"); - printf("[stosys-result] Test 1 (write, read, and match on LBA0) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); - printf("====================================================================\n"); + uint64_t end = microseconds_since_epoch(); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 2 results" << std::endl; + std::cout << "[stosys-result] Test 1 (write, read, and match on LBA0) : " + << (t1 == 0 ? " Passed" : " Failed") << std::endl; + printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", + max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "[stosys-stats] The elapsed time is " + << (end - start) / 1000UL << " milliseconds" << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; return ret; } + } diff --git a/src/m23-ftl/m3.cpp b/src/m23-ftl/m3.cpp index 8b3aed3..971b648 100644 --- a/src/m23-ftl/m3.cpp +++ b/src/m23-ftl/m3.cpp @@ -20,69 +20,72 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - -#include +#include #include +#include #include #include -#include -#include #include +#include #include +#include +#include #include - #include "zns_device.h" #include "../common/utils.h" - -static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) { +static int get_sequence_as_array(const uint64_t &capacity, uint64_t *&arr, + const bool &shuffle) +{ std::vector myvector; + // set some values: + for (uint64_t i = 0; i < capacity; ++i) + myvector.emplace_back(i); std::random_device rd; std::mt19937 g(rd()); - uint64_t *tmp = nullptr; - // set some values: - for (uint64_t i = 0; i < capacity; i++) { - myvector.push_back(i); - } - if(shuffle) { + if(shuffle) std::shuffle(myvector.begin(), myvector.end(), g); - } - tmp = new uint64_t[capacity]; - for(uint64_t i = 0; i < capacity; i++){ - tmp[i] = myvector[i]; - } - *arr = tmp; + arr = new uint64_t[capacity]; + for (uint64_t i = 0; i < capacity; ++i) + arr[i] = myvector[i]; return 0; } extern "C" { -static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read){ - int ret; +static int _complete_file_io(const int &fd, const uint64_t &offset, + void *buf, const int &sz, const int &is_read) +{ uint64_t written_so_far = 0; - uintptr_t ptr = (uintptr_t) buf; - while (written_so_far < (uint64_t) sz) { - if(is_read == 1) { - ret = pread(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } else { - ret = pwrite(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } - if(ret < 0){ - printf("file writing failed %d \n", ret); + uintptr_t ptr = reinterpret_cast(buf); + while (written_so_far < static_cast(sz)) { + int ret = 0; + if(is_read) + ret = pread(fd, reinterpret_cast(ptr + written_so_far), + sz - written_so_far, offset + written_so_far); + else + ret = pwrite(fd, reinterpret_cast + (ptr + written_so_far), + sz - written_so_far, offset + written_so_far); + if (ret < 0) { + std::cout << "file writing failed " << ret << std::endl; return ret; } //other add and move along - written_so_far+=ret; + written_so_far += ret; } return 0; } -static int write_complete_file(int fd, uint64_t offset, void *buf, int sz){ +static int write_complete_file(const int &fd, const uint64_t &offset, + void *buf, const int &sz) +{ return _complete_file_io(fd, offset, buf, sz, 0); } -static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ +static int read_complete_file(const int &fd, const uint64_t &offset, + void *buf, const int &sz) +{ return _complete_file_io(fd, offset, buf, sz, 1); } @@ -96,135 +99,165 @@ static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ * list_size = size of the address list * max_hammer_io = a random number, for how many times I should randomly do a write on a random LBA */ -static int wr_full_device_verify(struct user_zns_device *dev, - const uint64_t *addr_list, const uint32_t list_size, - const uint32_t max_hammer_io){ - int ret; +static int wr_full_device_verify(const user_zns_device *dev, + const uint64_t *addr_list, + const uint32_t &list_size, + const uint32_t &max_hammer_io) +{ + std::unique_ptr b1(new char[dev->lba_size_bytes]()); + std::unique_ptr b2(new char[dev->lba_size_bytes]()); + assert(b1); + assert(b2); + write_pattern(b1.get(), dev->lba_size_bytes); const char *tmp_file = "./tmp-output-fulld"; - char *b1 = (char*) calloc(1, dev->lba_size_bytes); - char *b2 = (char*) calloc(1, dev->lba_size_bytes); - assert(b1 != nullptr); - assert(b2 != nullptr); - - write_pattern(b1, dev->lba_size_bytes); int fd = open(tmp_file, O_RDWR|O_CREAT, 0666); if (fd < 0) { - printf("Error: opening of the temp file failed, ret %d ", fd); + std::cout << "Error: opening of the temp file failed, ret " << fd; return -1; } // allocate this side file to the full capacity - ret = posix_fallocate(fd, 0, dev->capacity_bytes); - if(ret){ - printf("Error: fallocate failed, ret %d ", ret); + int ret = posix_fallocate(fd, 0, dev->capacity_bytes); + if (ret) { + std::cout << "Error: fallocate failed, ret " << ret; return -1; } - printf("fallocate OK with %s and size 0x%lx \n", tmp_file, dev->capacity_bytes); + std::cout << "fallocate OK with " << tmp_file << "s and size 0x" + << std::hex << dev->capacity_bytes << std::dec << std::endl; // https://stackoverflow.com/questions/29381843/generate-random-number-in-range-min-max const int min = 0; const int max = dev->lba_size_bytes; - //initialize the device, otherwise we may have indexes where there is random garbage in both cases - for(uint32_t i = 0; i < list_size; i++){ - uint64_t woffset = (addr_list[i]) * dev->lba_size_bytes; - //random offset within the page and just write some random stuff = this is to make a unique I/O pattern - b1[(min + (rand() % (max - min)))] = (char) rand(); - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); + //initialize the device, otherwise we may have indexes + // where there is random garbage in both cases + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t woffset = addr_list[i] * dev->lba_size_bytes; + //random offset within the page and just write some random stuff = + // this is to make a unique I/O pattern + b1[(min + rand() % (max - min))] = (char) rand(); + // now we need to write the buffer in parallel to the zns device + // and the file + ret = zns_udevice_write(const_cast(dev), woffset, + b1.get(), dev->lba_size_bytes); + if (ret) { + std::cout << "Error: ZNS device writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } - ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: file writing failed at offset 0x%lx \n", woffset); + ret = write_complete_file(fd, woffset, b1.get(), dev->lba_size_bytes); + if (ret) { + std::cout << "Error: file writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } } - printf("the ZNS user device has been written (ONCE) completely OK\n"); - if(max_hammer_io > 0){ - printf("Hammering some random LBAs %d times \n", max_hammer_io); - for(uint32_t i = 0; i < max_hammer_io; i++){ + std::cout << "the ZNS user device has been written (ONCE) completely OK" + << std::endl; + if (max_hammer_io > 0) { + std::cout << "Hammering some random LBAs " << max_hammer_io << " times" + << std::endl; + for (uint32_t i = 0; i < max_hammer_io; ++i) { // we should not generate offset which is within the list_size - uint64_t woffset = (addr_list[ 0 + (rand() % (list_size - 0))]) * dev->lba_size_bytes; - //random offset within the page and just write some random stuff, like i - b1[(min + (rand() % (max - min)))] = (char) rand(); - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); + uint64_t woffset = addr_list[0 + rand() % (list_size - 0)] * + dev->lba_size_bytes; + //random offset within the page and just write some random stuff, + // like i + b1[(min + rand() % (max - min))] = static_cast(rand()); + // now we need to write the buffer in parallel to the zns device, + // and the file + ret = zns_udevice_write(const_cast(dev), woffset, + b1.get(), dev->lba_size_bytes); + if (ret) { + std::cout << "Error: ZNS device writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } - ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: file writing failed at offset 0x%lx \n", woffset); + ret = write_complete_file(fd, woffset, + b1.get(), dev->lba_size_bytes); + if (ret) { + std::cout << "Error: file writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } } - printf("Hammering done, OK for %d times \n", max_hammer_io); + std::cout << "Hammering done, OK for " << max_hammer_io << " times" + << std::endl; } - printf("verifying the content of the ZNS device ....\n"); + std::cout << "verifying the content of the ZNS device ...." << std::endl; // reset the buffers - write_pattern(b1, dev->lba_size_bytes); - write_pattern(b2, dev->lba_size_bytes); + write_pattern(b1.get(), dev->lba_size_bytes); + write_pattern(b2.get(), dev->lba_size_bytes); // and now read the whole device and compare the content with the file - for(uint32_t i = 0; i < list_size; i++){ - uint64_t roffset = (addr_list[i]) * dev->lba_size_bytes; - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_read(dev, roffset, b1, dev->lba_size_bytes); - assert(ret == 0); - ret = read_complete_file(fd, roffset, b2, dev->lba_size_bytes); - assert(ret == 0); + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t roffset = addr_list[i] * dev->lba_size_bytes; + // now we need to write the buffer in parallel to the zns device, + // and the file + ret = zns_udevice_read(const_cast(dev), roffset, + b1.get(), dev->lba_size_bytes); + assert(!ret); + ret = read_complete_file(fd, roffset, b2.get(), dev->lba_size_bytes); + assert(!ret); //now both of these should match - for(uint32_t j = 0; j < dev->lba_size_bytes; j++) - if(b1[j] != b2[j]){ - printf("ERROR: buffer mismatch at i %d and j %d , address is 0%lx expecting %x found %x \n", - i, j, roffset, b2[j], b1[j]); + for(uint32_t j = 0; j < dev->lba_size_bytes; ++j) + if (b1[j] != b2[j]) { + std::cout << "ERROR: buffer mismatch at i " << i + << " and j " << j << " , address is 0" + << std::hex << roffset << " expecting " << b2[j] + << " found " << b1[j] << std::dec << std::endl; ret = -EINVAL; goto done; } } - printf("Verification passed on the while device \n"); - + std::cout << "Verification passed on the while device" << std::endl; done: - free(b1); - free(b2); close(fd); ret = remove(tmp_file); - if(ret != 0){ - printf("Error: file deleting failed with ret %d \n", ret); + if (ret) { + std::cout << "Error: file deleting failed with ret " << ret + << std::endl; } return ret; } -static int show_help(){ - printf("Usage: m2 -d device_name -h -r \n"); - printf("-d : /dev/nvmeXpY - in this format with the full path \n"); - printf("-r : resume if the FTL can. \n"); - printf("-l : the number of zones to use for log/metadata (default, minimum = 3). \n"); - printf("-w : watermark threshold, the number of free zones when to trigger the gc (default, minimum = 1). \n"); - printf("-o : overwrite so [int] times (default, 10,000). \n"); - printf("-h : shows help, and exits with success. No argument needed\n"); +static int show_help() +{ + std::cout << "Usage: m2 -d device_name -h -r" << std::endl; + std::cout << "-d : /dev/nvmeXpY - in this format with the full path" + << std::endl; + std::cout << "-r : resume if the FTL can." << std::endl; + std::cout << "-l : the number of zones to use for log/metadata (default, \ +minimum = 3)." << std::endl; + std::cout << "-w : watermark threshold, the number of free zones when to \ +trigger the gc (default, minimum = 1)." << std::endl; + std::cout << "-o : overwrite so [int] times (default, 10,000)." + << std::endl; + std::cout << "-h : shows help, and exits with success. No argument needed" + << std::endl; return 0; } -int main(int argc, char **argv) { - uint64_t start, end; - start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); - int ret, c; - char *zns_device_name = (char*) "nvme0n1", *str1 = nullptr; - struct user_zns_device *my_dev = nullptr; - uint64_t *seq_addresses = nullptr, *random_addresses = nullptr; - uint32_t to_hammer_lba = 10000; - - struct zdev_init_params params; - params.force_reset = true; - params.log_zones = 3; - params.gc_wmark = 1; - - printf("===================================================================================== \n"); - printf("This is M3. The goal of this milestone is to implement a hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS WITH a GC \n"); - printf(" ^^^^^^^^^ \n"); - printf("===================================================================================== \n"); +int main(int argc, char *argv[]) +{ + uint64_t start = microseconds_since_epoch(); + srand(static_cast(time(NULL)) * getpid()); + std::cout << "=============================================================\ +========================" << std::endl; + std::cout << "This is M3. The goal of this milestone is to implement a \ +hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS WITH a GC" + << std::endl; + std::cout << " \ + ^^^^^^^^^" + << std::endl; + std::cout << "=============================================================\ +========================" << std::endl; + int c = 0; + char *zns_device_name = const_cast("nvme0n1"); + char *str1 = nullptr; + uint32_t to_hammer_lba = 10000U; + zdev_init_params params = { + .name = nullptr, + .log_zones = 3, + .gc_wmark = 1, + .force_reset = true + }; while ((c = getopt(argc, argv, "o:m:l:d:w:hr")) != -1) { switch (c) { case 'h': @@ -239,14 +272,14 @@ int main(int argc, char **argv) { case 'd': str1 = strdupa(optarg); if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); + std::cout << "Could not parse the arguments for the device " + << optarg << std::endl; exit(EXIT_FAILURE); } for (int j = 1; ; j++) { char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { + if (!token) break; - } // if there was a valid parse, just save it zns_device_name = token; } @@ -254,15 +287,18 @@ int main(int argc, char **argv) { break; case 'l': params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + if (params.log_zones < 3) { + std::cout << "you need 3 or more zones for the log area \ +(metadata (think: milestone 5) + log). You passed " << params.log_zones + << std::endl; exit(-1); } break; case 'w': params.gc_wmark = atoi(optarg); - if (params.gc_wmark < 1){ - printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); + if (params.gc_wmark < 1) { + std::cout << "you need 1 or more free zones for continuous \ +working of the FTL. You passed " << params.gc_wmark << std::endl; exit(-1); } break; @@ -272,39 +308,56 @@ int main(int argc, char **argv) { } } params.name = strdup(zns_device_name); - printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s hammer-time %d \n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no", to_hammer_lba); - - ret = init_ss_zns_device(¶ms, &my_dev); - assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); + std::cout << "parameter settings are: device-name " << params.name + << " log_zones " << params.log_zones + << " gc-watermark " << params.gc_wmark + << " force-reset " << (params.force_reset ? "yes" : "no") + << " hammer-time " << to_hammer_lba << std::endl; + user_zns_device *my_dev = nullptr; + int ret = init_ss_zns_device(¶ms, &my_dev); + assert (!ret); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; // get a sequential LBA address list - get_sequence_as_array(max_lba_entries, &seq_addresses, false); + uint64_t *seq_addresses = nullptr; + get_sequence_as_array(max_lba_entries, seq_addresses, false); // get a randomized LBA address list - get_sequence_as_array(max_lba_entries, &random_addresses, true); + uint64_t *random_addresses = nullptr; + get_sequence_as_array(max_lba_entries, random_addresses, true); // now we start the test - printf("device %s is opened and initialized, reported LBA size is %u and capacity %lu , max total LBA %u to_hammer %u \n", - params.name, my_dev->lba_size_bytes, my_dev->capacity_bytes, max_lba_entries, to_hammer_lba); - int t1 = wr_full_device_verify(my_dev, seq_addresses, max_lba_entries, 0); - int t2 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, 0); + std::cout << "device " << params.name + << " is opened and initialized, reported LBA size is " + << my_dev->lba_size_bytes + << " and capacity " << my_dev->capacity_bytes + << " , max total LBA " << max_lba_entries + << " to_hammer " << to_hammer_lba << std::endl; + int t1 = wr_full_device_verify(my_dev, seq_addresses, max_lba_entries, 0U); + int t2 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, 0U); int t3 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, to_hammer_lba); // clean up ret = deinit_ss_zns_device(my_dev); // free all delete[] seq_addresses; delete[] random_addresses; - end = microseconds_since_epoch(); - printf("====================================================================\n"); - printf("Milestone 3 results \n"); - printf("[stosys-result] Test 1 sequential write, read, and match (full device) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 2 randomized write, read, and match (full device) : %s \n", (t2 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 3 randomized write, read, and match (full device, hammer %-6u) : %s \n", to_hammer_lba, (t3 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); - printf("====================================================================\n"); + uint64_t end = microseconds_since_epoch(); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 3 results" << std::endl; + std::cout << "[stosys-result] Test 1 sequential write, read, and match \ +(full device) : " << (!t1 ? " Passed" : " Failed") << std::endl; + std::cout << "[stosys-result] Test 2 randomized write, read, and match \ +(full device) : " << (!t2 ? " Passed" : " Failed") << std::endl; + printf("[stosys-result] Test 3 randomized write, read, and match (full \ +device, hammer %-6u) : %s \n", to_hammer_lba, + (!t3 ? " Passed" : " Failed")); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "[stosys-stats] The elapsed time is " + << (end - start) / 1000UL << " milliseconds" << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; return ret; } -} +} From dee4c813ac26732eae0fc1c8b7fa15313e44f74f Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 08:50:16 +0000 Subject: [PATCH 068/101] clean m1 m2 m3 code --- src/m1/m1.cpp | 1 - src/m23-ftl/m2.cpp | 33 +++++++++++--------- src/m23-ftl/m3.cpp | 76 ++++++++++++++++++++++++---------------------- 3 files changed, 58 insertions(+), 52 deletions(-) diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index a486ee6..b816e6b 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -22,7 +22,6 @@ SOFTWARE. #include #include -#include #include #include #include diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index bb3c10c..fa0947d 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -22,7 +22,6 @@ SOFTWARE. #include #include -#include #include #include #include @@ -33,10 +32,11 @@ SOFTWARE. extern "C" { -static int write_read_random_lbas(user_zns_device *my_dev, void *buf, - uint32_t buf_size, uint64_t max_lbas_to_test) +static int write_read_random_lbas(const user_zns_device &my_dev, void *buf, + const uint32_t &buf_size, + const uint64_t &max_lbas_to_test) { - uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; + uint32_t max_lba_entries = my_dev.capacity_bytes / my_dev.lba_size_bytes; if (max_lba_entries < max_lbas_to_test) { std::cout << "Error: not sufficient LBAs available, pass a smaller \ number" << std::endl; @@ -50,11 +50,11 @@ number" << std::endl; for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - ith iteration write_pattern_with_start(static_cast(buf), buf_size, i); - int ret = zns_udevice_write(my_dev, i * my_dev->lba_size_bytes, - buf, buf_size); + int ret = zns_udevice_write(const_cast(&my_dev), + i * my_dev.lba_size_bytes, buf, buf_size); if (ret) { std::cout << "Error: writing the device failed at address 0x" - << std::hex << i * my_dev->lba_size_bytes << std::dec + << std::hex << i * my_dev.lba_size_bytes << std::dec << " [index " << i - start_lba << "]" << std::endl; return ret; } @@ -65,11 +65,11 @@ number" << std::endl; for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write bzero(static_cast(buf), buf_size); - int ret = zns_udevice_read(my_dev, i * my_dev->lba_size_bytes, - buf, buf_size); + int ret = zns_udevice_read(const_cast(&my_dev), + i * my_dev.lba_size_bytes, buf, buf_size); if (ret) { std::cout << "Error: writing the device failed at address 0x" - << std::hex << i * my_dev->lba_size_bytes << std::dec + << std::hex << i * my_dev.lba_size_bytes << std::dec << " [index " << i - start_lba << "]" << std::endl; return ret; } @@ -81,11 +81,13 @@ number" << std::endl; return 0; } -static int write_read_lba0(user_zns_device *dev, void *buf, uint32_t buf_size) +static int write_read_lba0(const user_zns_device &dev, + void *buf, const uint32_t &buf_size) { write_pattern(static_cast(buf), buf_size); uint64_t test_lba = 0UL; - int ret = zns_udevice_write(dev, test_lba, buf, buf_size); + int ret = zns_udevice_write(const_cast(&dev), test_lba, + buf, buf_size); if (ret) { std::cout << "Error: writing the device failed at address 0x" << std::hex << test_lba << std::dec << std::endl; @@ -95,7 +97,8 @@ static int write_read_lba0(user_zns_device *dev, void *buf, uint32_t buf_size) << std::hex << test_lba << std::dec << std::endl; // zero it out bzero(buf, buf_size); - ret = zns_udevice_read(dev, test_lba, buf, buf_size); + ret = zns_udevice_read(const_cast(&dev), test_lba, + buf, buf_size); if (ret) { std::cout << "Error: reading the device failed at address 0x" << std::hex << test_lba << std::dec << std::endl; @@ -198,9 +201,9 @@ hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS (no GC)" std::cout << "Why? we assume one zone will eventually be taken for writing \ metadata, and the rest will be used for the FTL log" << std::endl; std::unique_ptr test_buf(new char[my_dev->lba_size_bytes]()); - int t1 = write_read_lba0(my_dev, test_buf.get(), my_dev->lba_size_bytes); + int t1 = write_read_lba0(*my_dev, test_buf.get(), my_dev->lba_size_bytes); // -1 because we have already written one LBA. - int t2 = write_read_random_lbas(my_dev, test_buf.get(), + int t2 = write_read_random_lbas(*my_dev, test_buf.get(), my_dev->lba_size_bytes, max_num_lba_to_test - 1UL); ret = deinit_ss_zns_device(my_dev); diff --git a/src/m23-ftl/m3.cpp b/src/m23-ftl/m3.cpp index 971b648..3b1e64a 100644 --- a/src/m23-ftl/m3.cpp +++ b/src/m23-ftl/m3.cpp @@ -23,7 +23,6 @@ SOFTWARE. #include #include #include -#include #include #include #include @@ -54,11 +53,11 @@ static int get_sequence_as_array(const uint64_t &capacity, uint64_t *&arr, extern "C" { static int _complete_file_io(const int &fd, const uint64_t &offset, - void *buf, const int &sz, const int &is_read) + void *buf, const uint32_t &sz, const bool &is_read) { - uint64_t written_so_far = 0; + uint32_t written_so_far = 0; uintptr_t ptr = reinterpret_cast(buf); - while (written_so_far < static_cast(sz)) { + while (written_so_far < sz) { int ret = 0; if(is_read) ret = pread(fd, reinterpret_cast(ptr + written_so_far), @@ -78,37 +77,40 @@ static int _complete_file_io(const int &fd, const uint64_t &offset, } static int write_complete_file(const int &fd, const uint64_t &offset, - void *buf, const int &sz) + void *buf, const uint32_t &sz) { - return _complete_file_io(fd, offset, buf, sz, 0); + return _complete_file_io(fd, offset, buf, sz, false); } static int read_complete_file(const int &fd, const uint64_t &offset, - void *buf, const int &sz) + void *buf, const uint32_t &sz) { - return _complete_file_io(fd, offset, buf, sz, 1); + return _complete_file_io(fd, offset, buf, sz, true); } /* - * Based on if the addr_list was in sequence or randomized - we will do sequential or random I/O + * Based on if the addr_list was in sequence or randomized - + * we will do sequential or random I/O * -- - * So the idea of this test is to write a parallel file on the side which has the same content, and the + * So the idea of this test is to write a parallel file on the side + * which has the same content, and the * ZNS device content should match with this file. * * addr_list = list of LBAs how they should be accessed * list_size = size of the address list - * max_hammer_io = a random number, for how many times I should randomly do a write on a random LBA + * max_hammer_io = a random number, + * for how many times I should randomly do a write on a random LBA */ -static int wr_full_device_verify(const user_zns_device *dev, +static int wr_full_device_verify(const user_zns_device &dev, const uint64_t *addr_list, const uint32_t &list_size, const uint32_t &max_hammer_io) { - std::unique_ptr b1(new char[dev->lba_size_bytes]()); - std::unique_ptr b2(new char[dev->lba_size_bytes]()); + std::unique_ptr b1(new char[dev.lba_size_bytes]()); + std::unique_ptr b2(new char[dev.lba_size_bytes]()); assert(b1); assert(b2); - write_pattern(b1.get(), dev->lba_size_bytes); + write_pattern(b1.get(), dev.lba_size_bytes); const char *tmp_file = "./tmp-output-fulld"; int fd = open(tmp_file, O_RDWR|O_CREAT, 0666); if (fd < 0) { @@ -116,33 +118,33 @@ static int wr_full_device_verify(const user_zns_device *dev, return -1; } // allocate this side file to the full capacity - int ret = posix_fallocate(fd, 0, dev->capacity_bytes); + int ret = posix_fallocate(fd, 0, dev.capacity_bytes); if (ret) { std::cout << "Error: fallocate failed, ret " << ret; return -1; } std::cout << "fallocate OK with " << tmp_file << "s and size 0x" - << std::hex << dev->capacity_bytes << std::dec << std::endl; + << std::hex << dev.capacity_bytes << std::dec << std::endl; // https://stackoverflow.com/questions/29381843/generate-random-number-in-range-min-max const int min = 0; - const int max = dev->lba_size_bytes; + const int max = dev.lba_size_bytes; //initialize the device, otherwise we may have indexes // where there is random garbage in both cases for (uint32_t i = 0; i < list_size; ++i) { - uint64_t woffset = addr_list[i] * dev->lba_size_bytes; + uint64_t woffset = addr_list[i] * dev.lba_size_bytes; //random offset within the page and just write some random stuff = // this is to make a unique I/O pattern b1[(min + rand() % (max - min))] = (char) rand(); // now we need to write the buffer in parallel to the zns device // and the file - ret = zns_udevice_write(const_cast(dev), woffset, - b1.get(), dev->lba_size_bytes); + ret = zns_udevice_write(const_cast(&dev), woffset, + b1.get(), dev.lba_size_bytes); if (ret) { std::cout << "Error: ZNS device writing failed at offset 0x" << std::hex << woffset << std::dec << std::endl; goto done; } - ret = write_complete_file(fd, woffset, b1.get(), dev->lba_size_bytes); + ret = write_complete_file(fd, woffset, b1.get(), dev.lba_size_bytes); if (ret) { std::cout << "Error: file writing failed at offset 0x" << std::hex << woffset << std::dec << std::endl; @@ -157,21 +159,21 @@ static int wr_full_device_verify(const user_zns_device *dev, for (uint32_t i = 0; i < max_hammer_io; ++i) { // we should not generate offset which is within the list_size uint64_t woffset = addr_list[0 + rand() % (list_size - 0)] * - dev->lba_size_bytes; + dev.lba_size_bytes; //random offset within the page and just write some random stuff, // like i b1[(min + rand() % (max - min))] = static_cast(rand()); // now we need to write the buffer in parallel to the zns device, // and the file - ret = zns_udevice_write(const_cast(dev), woffset, - b1.get(), dev->lba_size_bytes); + ret = zns_udevice_write(const_cast(&dev), + woffset, b1.get(), dev.lba_size_bytes); if (ret) { std::cout << "Error: ZNS device writing failed at offset 0x" << std::hex << woffset << std::dec << std::endl; goto done; } ret = write_complete_file(fd, woffset, - b1.get(), dev->lba_size_bytes); + b1.get(), dev.lba_size_bytes); if (ret) { std::cout << "Error: file writing failed at offset 0x" << std::hex << woffset << std::dec << std::endl; @@ -183,20 +185,20 @@ static int wr_full_device_verify(const user_zns_device *dev, } std::cout << "verifying the content of the ZNS device ...." << std::endl; // reset the buffers - write_pattern(b1.get(), dev->lba_size_bytes); - write_pattern(b2.get(), dev->lba_size_bytes); + write_pattern(b1.get(), dev.lba_size_bytes); + write_pattern(b2.get(), dev.lba_size_bytes); // and now read the whole device and compare the content with the file for (uint32_t i = 0; i < list_size; ++i) { - uint64_t roffset = addr_list[i] * dev->lba_size_bytes; + uint64_t roffset = addr_list[i] * dev.lba_size_bytes; // now we need to write the buffer in parallel to the zns device, // and the file - ret = zns_udevice_read(const_cast(dev), roffset, - b1.get(), dev->lba_size_bytes); + ret = zns_udevice_read(const_cast(&dev), roffset, + b1.get(), dev.lba_size_bytes); assert(!ret); - ret = read_complete_file(fd, roffset, b2.get(), dev->lba_size_bytes); + ret = read_complete_file(fd, roffset, b2.get(), dev.lba_size_bytes); assert(!ret); //now both of these should match - for(uint32_t j = 0; j < dev->lba_size_bytes; ++j) + for(uint32_t j = 0; j < dev.lba_size_bytes; ++j) if (b1[j] != b2[j]) { std::cout << "ERROR: buffer mismatch at i " << i << " and j " << j << " , address is 0" @@ -332,9 +334,11 @@ working of the FTL. You passed " << params.gc_wmark << std::endl; << " and capacity " << my_dev->capacity_bytes << " , max total LBA " << max_lba_entries << " to_hammer " << to_hammer_lba << std::endl; - int t1 = wr_full_device_verify(my_dev, seq_addresses, max_lba_entries, 0U); - int t2 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, 0U); - int t3 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, to_hammer_lba); + int t1 = wr_full_device_verify(*my_dev, seq_addresses, max_lba_entries, 0U); + int t2 = wr_full_device_verify(*my_dev, random_addresses, max_lba_entries, + 0U); + int t3 = wr_full_device_verify(*my_dev, random_addresses, max_lba_entries, + to_hammer_lba); // clean up ret = deinit_ss_zns_device(my_dev); // free all From 34b95474c90d128451b3dca917a8ee6e06d3f751 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 08:52:39 +0000 Subject: [PATCH 069/101] slight change --- src/m1/m1.cpp | 4 ++-- src/m23-ftl/m2.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index b816e6b..a87a22f 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -287,9 +287,9 @@ system?" << std::endl; =======" << std::endl; std::cout << "Milestone 1 results" << std::endl; std::cout << "Test 1 (read, write, append, reset) : " - << (t1 == 0 ? " Passed" : " Failed") << std::endl; + << (!t1 ? " Passed" : " Failed") << std::endl; std::cout << "Test 2 (Large zone read, write) : " - << (t2 == 0 ? " Passed" : " Failed") << std::endl; + << (!t2 ? " Passed" : " Failed") << std::endl; std::cout << "=============================================================\ =======" << std::endl; for(int i = 0; i < num_devices; ++i) diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index fa0947d..4d6cf93 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -213,9 +213,9 @@ metadata, and the rest will be used for the FTL log" << std::endl; =======" << std::endl; std::cout << "Milestone 2 results" << std::endl; std::cout << "[stosys-result] Test 1 (write, read, and match on LBA0) : " - << (t1 == 0 ? " Passed" : " Failed") << std::endl; + << (!t1 ? " Passed" : " Failed") << std::endl; printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", - max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); + max_num_lba_to_test, (!t2 ? " Passed" : " Failed")); std::cout << "=============================================================\ =======" << std::endl; std::cout << "[stosys-stats] The elapsed time is " From ccc66de0721ae19ace8c352e34a7f233ec5b864b Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 16:47:06 +0000 Subject: [PATCH 070/101] use standard thread and mutex --- src/m23-ftl/zns_device.cpp | 285 ++++++++++++++++--------------------- 1 file changed, 126 insertions(+), 159 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index fb59f3c..18475f8 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -23,15 +23,15 @@ SOFTWARE. #include #include #include -#include -#include +#include +#include +#include #include -#include -#include -#include #include "zns_device.h" -extern "C" { +using std::move; +using std::mutex; +using std::thread; enum { user_read = 0x1, @@ -47,8 +47,8 @@ struct zone_info { unsigned long long saddr; uint32_t num_valid_pages; uint32_t write_ptr; - pthread_mutex_t num_valid_pages_lock; - pthread_mutex_t write_ptr_lock; + mutex num_valid_pages_lock; + mutex write_ptr_lock; zone_info *next; // linked in free_zones and used_log_zones }; @@ -69,14 +69,14 @@ struct logical_block { zone_info *data_zone; // block mapping for this logical block (data zone) uint8_t *bitmap; //TODO: LOCK the access - pthread_mutex_t lock; + mutex lock; }; struct zns_info { // Values from init parameters int num_log_zones; int gc_wmark; - pthread_t gc_thread; + thread gc_thread; bool run_gc; // Query the nsid for following info int fd; @@ -90,7 +90,7 @@ struct zns_info { uint8_t used_status; uint32_t free_transfer_size; uint32_t free_append_size; - pthread_mutex_t size_limit_lock; + mutex size_limit_lock; // Log zones zone_info *curr_log_zone; int num_used_log_zones; @@ -100,7 +100,7 @@ struct zns_info { uint32_t num_free_zones; zone_info *free_zones; zone_info *free_zones_tail; - pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone + mutex zones_lock; // Lock for changing used_log_zone and free_zone // logical block corresponding to each data zone logical_block *logical_blocks; }; @@ -113,16 +113,16 @@ static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages); static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); -static bool read_bitmap(logical_block *block, +static bool read_bitmap(const uint8_t bitmap[], uint32_t offset, uint32_t num_pages); -static void write_bitmap(logical_block *block, +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages); static void change_log_zone(zns_info *info); static void update_page_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr, uint32_t num_pages); static unsigned request_transfer_size(zns_info *info, uint8_t type); -static void free_transfer_size(zns_info *info, uint8_t type, unsigned size); +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size); static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size, uint8_t type); static int append_to_data_zone(zns_info *info, zone_info *zone, @@ -132,13 +132,15 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, static int read_logical_block(zns_info *info, logical_block *block, void *buffer); static void merge(zns_info *info, logical_block *block); -static void *garbage_collection(void *info_ptr); +static void garbage_collection(zns_info *info); + +extern "C" { int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { - *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); - (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); + *my_dev = new user_zns_device(); + (*my_dev)->_private = new zns_info(); zns_info *info = (zns_info *)(*my_dev)->_private; // set num_log_zones info->num_log_zones = params->log_zones; @@ -202,40 +204,20 @@ int init_ss_zns_device(struct zdev_init_params *params, // set max_data_transfer_size nvme_id_ctrl id0; nvme_identify_ctrl(info->fd, &id0); - void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); - if (errno) { - printf("Failed to mmap\n"); - return errno; - } - info->mdts = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id0.mdts)) - - 2U) * info->page_size; + info->mdts = ((1U << id0.mdts) - 2U) * info->page_size; // set zone_append_size_limit nvme_zns_id_ctrl id1; nvme_zns_identify_ctrl(info->fd, &id1); - info->zasl = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id1.zasl)) - - 2U) * info->page_size; - munmap(regs, getpagesize()); - if (errno) { - printf("Failed to munmap\n"); - return errno; - } + info->zasl = ((1U << id1.zasl) - 2U) * info->page_size; info->free_transfer_size = info->mdts; info->free_append_size = info->zasl; - pthread_mutex_init(&info->size_limit_lock, NULL); - // init zones_lock - pthread_mutex_init(&info->zones_lock, NULL); // set all zone index to free_zones - info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones = new zone_info(); info->free_zones_tail = info->free_zones; - pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); for (uint32_t i = 1U; i < info->num_zones; ++i) { - info->free_zones_tail->next = (zone_info *)calloc(1UL, - sizeof(zone_info)); + info->free_zones_tail->next = new zone_info(); info->free_zones_tail = info->free_zones_tail->next; info->free_zones_tail->saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); } // set num_free_zones info->num_free_zones = info->num_zones; @@ -247,19 +229,16 @@ int init_ss_zns_device(struct zdev_init_params *params, info->curr_log_zone->next = NULL; --info->num_free_zones; // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); + info->logical_blocks = new logical_block[info->num_data_zones](); for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; - info->logical_blocks[i].bitmap = (uint8_t *) - calloc(info->num_data_zones * - info->zone_num_pages >> 3UL, - sizeof(uint8_t)); - pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + info->logical_blocks[i].bitmap = new uint8_t[info->num_data_zones * + info->zone_num_pages >> 3U + ](); } //Start GC info->run_gc = true; - pthread_create(&info->gc_thread, NULL, &garbage_collection, info); + info->gc_thread = move(thread(garbage_collection, info)); return 0; } @@ -276,9 +255,10 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - if (!read_bitmap(block, offset, curr_block_read_size / info->page_size)) + if (!read_bitmap(block->bitmap, offset, + curr_block_read_size / info->page_size)) return -1; - pthread_mutex_lock(&block->lock); + block->lock.lock(); if (block->data_zone) { uint32_t curr_read_size = block->data_zone->write_ptr * info->page_size; @@ -309,8 +289,8 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, - user_read); + (uint8_t *)buffer + buff_offset, + curr_read_size, user_read); start = curr; } prev = curr; @@ -321,17 +301,17 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, + (uint8_t *)buffer + buff_offset, curr_read_size, user_read); } - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); page_addr += curr_block_read_size / info->page_size; - buffer = (char *)buffer + curr_block_read_size; + buffer = (uint8_t *)buffer + curr_block_read_size; size -= curr_block_read_size; } - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); info->used_status &= ~user_read; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); return errno; } @@ -346,7 +326,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; uint32_t curr_append_size = 0U; - pthread_mutex_lock(&block->lock); + block->lock.lock(); // if can write to data zone directly if (!block->old_page_maps && block->data_zone && block->data_zone->write_ptr <= offset) { @@ -354,13 +334,13 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, // append null data until arrive offset uint32_t null_size = (offset - block->data_zone->write_ptr) * info->page_size; - char null_buffer[null_size]; - memset(null_buffer, 0, null_size); + uint8_t *null_buffer = new uint8_t[null_size](); int ret = append_to_data_zone(info, block->data_zone, null_buffer, null_size, user_write); + delete[] null_buffer; if (ret) { - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return ret; } } @@ -371,10 +351,10 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int ret = append_to_data_zone(info, block->data_zone, buffer, curr_append_size, user_write); if (ret) { - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return ret; } - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); } else { curr_append_size = size; if (block->data_zone) { @@ -383,20 +363,20 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (curr_append_size > diff_size) curr_append_size = diff_size; } - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); int ret = append_to_log_zone(info, address / info->page_size, buffer, curr_append_size); if (ret) return ret; } - write_bitmap(block, offset, curr_append_size / info->page_size); + write_bitmap(block->bitmap, offset, curr_append_size / info->page_size); address += curr_append_size; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); info->used_status &= ~user_write; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); return errno; } @@ -405,7 +385,7 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) zns_info *info = (zns_info *)my_dev->_private; // Kill gc info->run_gc = false; - pthread_join(info->gc_thread, NULL); + info->gc_thread.join(); logical_block *blocks = info->logical_blocks; // free hashmap for (uint32_t i = 0U; i < info->num_data_zones; ++i) { @@ -413,67 +393,58 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) while (blocks[i].page_maps) { page_map *tmp = blocks[i].page_maps; blocks[i].page_maps = blocks[i].page_maps->next; - free(tmp); + delete tmp; } if (blocks[i].data_zone) { - pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); - pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); - free(blocks[i].data_zone); + delete blocks[i].data_zone; } - free(blocks[i].bitmap); - pthread_mutex_destroy(&blocks[i].lock); + delete[] blocks[i].bitmap; } - free(blocks); + delete[] blocks; while (info->used_log_zones) { zone_info *tmp = info->used_log_zones; info->used_log_zones = info->used_log_zones->next; - pthread_mutex_destroy(&tmp->num_valid_pages_lock); - pthread_mutex_destroy(&tmp->write_ptr_lock); - free(tmp); + delete tmp; } while (info->free_zones) { zone_info *tmp = info->free_zones; info->free_zones = info->free_zones->next; - pthread_mutex_destroy(&tmp->num_valid_pages_lock); - pthread_mutex_destroy(&tmp->write_ptr_lock); - free(tmp); + delete tmp; } - pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); - pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); - free(info->curr_log_zone); - pthread_mutex_destroy(&info->size_limit_lock); - pthread_mutex_destroy(&info->zones_lock); - free(info); - free(my_dev); + delete info->curr_log_zone; + delete info; + delete my_dev; return 0; } +} + static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->num_valid_pages_lock); + zone->num_valid_pages_lock.lock(); zone->num_valid_pages += num_pages; - pthread_mutex_unlock(&zone->num_valid_pages_lock); + zone->num_valid_pages_lock.unlock(); } static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->num_valid_pages_lock); + zone->num_valid_pages_lock.lock(); zone->num_valid_pages -= num_pages; - pthread_mutex_unlock(&zone->num_valid_pages_lock); + zone->num_valid_pages_lock.unlock(); } static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr_lock.lock(); zone->write_ptr += num_pages; - pthread_mutex_unlock(&zone->write_ptr_lock); + zone->write_ptr_lock.unlock(); } static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages) { - pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr_lock.lock(); zone->write_ptr -= num_pages; - pthread_mutex_unlock(&zone->write_ptr_lock); + zone->write_ptr_lock.unlock(); } static inline uint32_t get_block_index(unsigned long long page_addr, @@ -488,29 +459,29 @@ static inline uint32_t get_data_offset(unsigned long long page_addr, return page_addr % zone_num_pages; } -static bool read_bitmap(logical_block *block, +static bool read_bitmap(const uint8_t bitmap[], uint32_t offset, uint32_t num_pages) { while (num_pages--) { - if (!(block->bitmap[offset >> 3U] & 1U << (offset & 0x7U))) + if (!(bitmap[offset >> 3U] & 1U << (offset & 0x7U))) return false; ++offset; } return true; } -static void write_bitmap(logical_block *block, +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages) { while (num_pages--) { - block->bitmap[offset >> 3U] |= 1U << (offset & 0x7U); + bitmap[offset >> 3U] |= 1U << (offset & 0x7U); ++offset; } } static void change_log_zone(zns_info *info) { - pthread_mutex_lock(&info->zones_lock); + info->zones_lock.lock(); if (info->used_log_zones) info->used_log_zones_tail->next = info->curr_log_zone; else @@ -518,18 +489,18 @@ static void change_log_zone(zns_info *info) info->used_log_zones_tail = info->curr_log_zone; info->curr_log_zone = NULL; ++info->num_used_log_zones; - pthread_mutex_unlock(&info->zones_lock); + info->zones_lock.unlock(); while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; while (!info->curr_log_zone) { - pthread_mutex_lock(&info->zones_lock); + info->zones_lock.lock(); if (info->num_free_zones) { info->curr_log_zone = info->free_zones; info->free_zones = info->free_zones->next; info->curr_log_zone->next = NULL; --info->num_free_zones; } - pthread_mutex_unlock(&info->zones_lock); + info->zones_lock.unlock(); } } @@ -541,14 +512,14 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; //Lock for updating page map - pthread_mutex_lock(&block->lock); + block->lock.lock(); if (!block->page_maps) { - block->page_maps = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps = new page_map(); block->page_maps_tail = block->page_maps; block->page_maps->page_addr = page_addr; block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return; } if (block->page_maps->page_addr == page_addr) { @@ -556,17 +527,17 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, decrease_num_valid_page(block->page_maps->zone, 1U); block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return; } if (block->page_maps->page_addr > page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + page_map *tmp = new page_map(); tmp->next = block->page_maps; block->page_maps = tmp; tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return; } page_map *ptr = block->page_maps; @@ -576,26 +547,26 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, decrease_num_valid_page(ptr->next->zone, 1U); ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return; } else if (ptr->next->page_addr > page_addr) { - page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + page_map *tmp = new page_map(); tmp->next = ptr->next; ptr->next = tmp; tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); return; } ptr = ptr->next; } - ptr->next = (page_map *)calloc(1, sizeof(page_map)); + ptr->next = new page_map(); block->page_maps_tail = ptr->next; ptr->next->page_addr = page_addr; ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); ++page_addr; ++physical_addr; } @@ -607,7 +578,7 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) uint32_t max_transfer_size = info->mdts; for (;;) { if (info->free_transfer_size) { - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); break; } } @@ -619,13 +590,13 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) max_transfer_size = info->free_transfer_size; info->free_transfer_size -= max_transfer_size; info->used_status |= type; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); return max_transfer_size; } else { uint32_t max_transfer_size = info->zasl; for (;;) { if (info->free_transfer_size && info->free_append_size) { - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); break; } } @@ -638,18 +609,18 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) info->free_transfer_size -= max_transfer_size; info->free_append_size -= max_transfer_size; info->used_status |= type; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); return max_transfer_size; } } -static void free_transfer_size(zns_info *info, uint8_t type, unsigned size) +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size) { - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); if (type & sb_write) info->free_append_size += size; info->free_transfer_size += size; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); } static int read_from_zns(zns_info *info, unsigned long long physical_addr, @@ -662,9 +633,9 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, unsigned short num_pages = curr_read_size / info->page_size; nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); - free_transfer_size(info, type, curr_transfer_size); + release_transfer_size(info, type, curr_transfer_size); physical_addr += num_pages; - buffer = (char *)buffer + curr_read_size; + buffer = (uint8_t *)buffer + curr_read_size; size -= curr_read_size; } return errno; @@ -685,10 +656,10 @@ static int append_to_data_zone(zns_info *info, zone_info *zone, nvme_zns_append(info->fd, info->nsid, zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); - free_transfer_size(info, type, curr_transfer_size); + release_transfer_size(info, type, curr_transfer_size); if (errno) return errno; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -717,7 +688,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); - free_transfer_size(info, user_write, curr_transfer_size); + release_transfer_size(info, user_write, curr_transfer_size); if (errno) return errno; increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); @@ -727,7 +698,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, change_log_zone(info); page_addr += num_curr_append_pages; physical_addr += num_curr_append_pages; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -754,7 +725,7 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, + (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); start = curr; } @@ -767,43 +738,42 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, gc_read); + (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); return errno; } static void merge(zns_info *info, logical_block *block) { - pthread_mutex_lock(&block->lock); + block->lock.lock(); block->old_page_maps = block->page_maps; block->page_maps = NULL; - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); uint32_t size = get_data_offset(block->page_maps_tail->page_addr, info->zone_num_pages) + 1U; if (block->data_zone && block->data_zone->write_ptr > size) size = block->data_zone->write_ptr; size *= info->page_size; - char buffer[size]; - memset(buffer, 0, size); + uint8_t *buffer = new uint8_t[size](); read_logical_block(info, block, buffer); - pthread_mutex_lock(&info->size_limit_lock); + info->size_limit_lock.lock(); info->used_status &= ~gc_read; - pthread_mutex_unlock(&info->size_limit_lock); - pthread_mutex_lock(&block->lock); + info->size_limit_lock.unlock(); + block->lock.lock(); // Append old data zone to free zones list if (block->data_zone) { decrease_write_ptr(block->data_zone, block->data_zone->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); - pthread_mutex_lock(&info->zones_lock); + info->zones_lock.lock(); if (info->free_zones) info->free_zones_tail->next = block->data_zone; else info->free_zones = block->data_zone; info->free_zones_tail = block->data_zone; ++info->num_free_zones; - pthread_mutex_unlock(&info->zones_lock); + info->zones_lock.unlock(); } - pthread_mutex_lock(&info->zones_lock); + info->zones_lock.lock(); // Get free zone and nullify the next block->data_zone = info->free_zones; info->free_zones = info->free_zones->next; @@ -811,42 +781,42 @@ static void merge(zns_info *info, logical_block *block) info->free_zones_tail = NULL; block->data_zone->next = NULL; --info->num_free_zones; - pthread_mutex_unlock(&info->zones_lock); + info->zones_lock.unlock(); append_to_data_zone(info, block->data_zone, buffer, size, gc_write); - pthread_mutex_lock(&info->size_limit_lock); + delete[] buffer; + info->size_limit_lock.lock(); info->used_status &= ~gc_write; - pthread_mutex_unlock(&info->size_limit_lock); + info->size_limit_lock.unlock(); while (block->old_page_maps) { page_map *tmp = block->old_page_maps; block->old_page_maps = block->old_page_maps->next; - free(tmp); + delete tmp; } - pthread_mutex_unlock(&block->lock); + block->lock.unlock(); } -static void *garbage_collection(void *info_ptr) +static void garbage_collection(zns_info *info) { - zns_info *info = (zns_info *)info_ptr; uint32_t index = 0U; while (info->run_gc) { while (info->num_log_zones - info->num_used_log_zones > info->gc_wmark) { if (!info->run_gc) - return NULL; + return; } logical_block *block = &info->logical_blocks[index]; while(!block->page_maps) { index = (index + 1U) % info->num_data_zones; block = &info->logical_blocks[index]; if (!info->run_gc) - return NULL; + return; } if (!info->run_gc) - return NULL; + return; // Merge logical block to data zone merge(info, block); if (!info->run_gc) - return NULL; + return; // Check used log zone valid counter // if zero reset and add to free zone list // Remove zone from used_log_zones @@ -860,7 +830,7 @@ static void *garbage_collection(void *info_ptr) decrease_write_ptr(curr, curr->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); - pthread_mutex_lock(&info->zones_lock); + info->zones_lock.lock(); // Remove from used_log_zones free = curr; curr = curr->next; @@ -881,7 +851,7 @@ static void *garbage_collection(void *info_ptr) info->free_zones = free; info->free_zones_tail = free; ++info->num_free_zones; - pthread_mutex_unlock(&info->zones_lock); + info->zones_lock.unlock(); } else { prev = curr; curr = curr->next; @@ -889,7 +859,4 @@ static void *garbage_collection(void *info_ptr) } index = (index + 1U) % info->num_data_zones; } - return NULL; -} - } From 27d3882819f24be74cfc8952e21d2f4476bf5144 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:40:51 +0000 Subject: [PATCH 071/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 249 ++++++++++++++++++++----------------- 1 file changed, 133 insertions(+), 116 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 18475f8..455445c 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -23,15 +23,13 @@ SOFTWARE. #include #include #include -#include -#include -#include +#include +#include #include +#include #include "zns_device.h" -using std::move; -using std::mutex; -using std::thread; +extern "C" { enum { user_read = 0x1, @@ -47,8 +45,8 @@ struct zone_info { unsigned long long saddr; uint32_t num_valid_pages; uint32_t write_ptr; - mutex num_valid_pages_lock; - mutex write_ptr_lock; + pthread_mutex_t num_valid_pages_lock; + pthread_mutex_t write_ptr_lock; zone_info *next; // linked in free_zones and used_log_zones }; @@ -69,14 +67,14 @@ struct logical_block { zone_info *data_zone; // block mapping for this logical block (data zone) uint8_t *bitmap; //TODO: LOCK the access - mutex lock; + pthread_mutex_t lock; }; struct zns_info { // Values from init parameters int num_log_zones; int gc_wmark; - thread gc_thread; + pthread_t gc_thread; bool run_gc; // Query the nsid for following info int fd; @@ -90,7 +88,7 @@ struct zns_info { uint8_t used_status; uint32_t free_transfer_size; uint32_t free_append_size; - mutex size_limit_lock; + pthread_mutex_t size_limit_lock; // Log zones zone_info *curr_log_zone; int num_used_log_zones; @@ -100,7 +98,7 @@ struct zns_info { uint32_t num_free_zones; zone_info *free_zones; zone_info *free_zones_tail; - mutex zones_lock; // Lock for changing used_log_zone and free_zone + pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone // logical block corresponding to each data zone logical_block *logical_blocks; }; @@ -115,8 +113,7 @@ static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); static bool read_bitmap(const uint8_t bitmap[], uint32_t offset, uint32_t num_pages); -static void write_bitmap(uint8_t bitmap[], - uint32_t offset, uint32_t num_pages); +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages); static void change_log_zone(zns_info *info); static void update_page_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr, @@ -132,15 +129,13 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, static int read_logical_block(zns_info *info, logical_block *block, void *buffer); static void merge(zns_info *info, logical_block *block); -static void garbage_collection(zns_info *info); - -extern "C" { +static void *garbage_collection(void *info_ptr); int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { - *my_dev = new user_zns_device(); - (*my_dev)->_private = new zns_info(); + *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); + (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; // set num_log_zones info->num_log_zones = params->log_zones; @@ -211,13 +206,21 @@ int init_ss_zns_device(struct zdev_init_params *params, info->zasl = ((1U << id1.zasl) - 2U) * info->page_size; info->free_transfer_size = info->mdts; info->free_append_size = info->zasl; + pthread_mutex_init(&info->size_limit_lock, NULL); + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); // set all zone index to free_zones - info->free_zones = new zone_info(); + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); for (uint32_t i = 1U; i < info->num_zones; ++i) { - info->free_zones_tail->next = new zone_info(); + info->free_zones_tail->next = (zone_info *)calloc(1UL, + sizeof(zone_info)); info->free_zones_tail = info->free_zones_tail->next; info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); } // set num_free_zones info->num_free_zones = info->num_zones; @@ -229,16 +232,19 @@ int init_ss_zns_device(struct zdev_init_params *params, info->curr_log_zone->next = NULL; --info->num_free_zones; // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = new logical_block[info->num_data_zones](); + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; - info->logical_blocks[i].bitmap = new uint8_t[info->num_data_zones * - info->zone_num_pages >> 3U - ](); + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(info->num_data_zones * + info->zone_num_pages >> 3UL, + sizeof(uint8_t)); + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } //Start GC info->run_gc = true; - info->gc_thread = move(thread(garbage_collection, info)); + pthread_create(&info->gc_thread, NULL, &garbage_collection, info); return 0; } @@ -255,10 +261,9 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - if (!read_bitmap(block->bitmap, offset, - curr_block_read_size / info->page_size)) + if (!read_bitmap(block->bitmap, offset, curr_block_read_size / info->page_size)) return -1; - block->lock.lock(); + pthread_mutex_lock(&block->lock); if (block->data_zone) { uint32_t curr_read_size = block->data_zone->write_ptr * info->page_size; @@ -289,8 +294,8 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (uint8_t *)buffer + buff_offset, - curr_read_size, user_read); + (char *)buffer + buff_offset, curr_read_size, + user_read); start = curr; } prev = curr; @@ -301,17 +306,17 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (uint8_t *)buffer + buff_offset, curr_read_size, + (char *)buffer + buff_offset, curr_read_size, user_read); } - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); page_addr += curr_block_read_size / info->page_size; - buffer = (uint8_t *)buffer + curr_block_read_size; + buffer = (char *)buffer + curr_block_read_size; size -= curr_block_read_size; } - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); info->used_status &= ~user_read; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); return errno; } @@ -326,7 +331,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; uint32_t curr_append_size = 0U; - block->lock.lock(); + pthread_mutex_lock(&block->lock); // if can write to data zone directly if (!block->old_page_maps && block->data_zone && block->data_zone->write_ptr <= offset) { @@ -334,13 +339,13 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, // append null data until arrive offset uint32_t null_size = (offset - block->data_zone->write_ptr) * info->page_size; - uint8_t *null_buffer = new uint8_t[null_size](); + char null_buffer[null_size]; + memset(null_buffer, 0, null_size); int ret = append_to_data_zone(info, block->data_zone, null_buffer, null_size, user_write); - delete[] null_buffer; if (ret) { - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return ret; } } @@ -351,10 +356,10 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, int ret = append_to_data_zone(info, block->data_zone, buffer, curr_append_size, user_write); if (ret) { - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return ret; } - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); } else { curr_append_size = size; if (block->data_zone) { @@ -363,7 +368,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (curr_append_size > diff_size) curr_append_size = diff_size; } - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); int ret = append_to_log_zone(info, address / info->page_size, buffer, curr_append_size); if (ret) @@ -371,12 +376,12 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, } write_bitmap(block->bitmap, offset, curr_append_size / info->page_size); address += curr_append_size; - buffer = (uint8_t *)buffer + curr_append_size; + buffer = (char *)buffer + curr_append_size; size -= curr_append_size; } - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); info->used_status &= ~user_write; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); return errno; } @@ -385,7 +390,7 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) zns_info *info = (zns_info *)my_dev->_private; // Kill gc info->run_gc = false; - info->gc_thread.join(); + pthread_join(info->gc_thread, NULL); logical_block *blocks = info->logical_blocks; // free hashmap for (uint32_t i = 0U; i < info->num_data_zones; ++i) { @@ -393,58 +398,67 @@ int deinit_ss_zns_device(struct user_zns_device *my_dev) while (blocks[i].page_maps) { page_map *tmp = blocks[i].page_maps; blocks[i].page_maps = blocks[i].page_maps->next; - delete tmp; + free(tmp); } if (blocks[i].data_zone) { - delete blocks[i].data_zone; + pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); + pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); + free(blocks[i].data_zone); } - delete[] blocks[i].bitmap; + free(blocks[i].bitmap); + pthread_mutex_destroy(&blocks[i].lock); } - delete[] blocks; + free(blocks); while (info->used_log_zones) { zone_info *tmp = info->used_log_zones; info->used_log_zones = info->used_log_zones->next; - delete tmp; + pthread_mutex_destroy(&tmp->num_valid_pages_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); + free(tmp); } while (info->free_zones) { zone_info *tmp = info->free_zones; info->free_zones = info->free_zones->next; - delete tmp; + pthread_mutex_destroy(&tmp->num_valid_pages_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); + free(tmp); } - delete info->curr_log_zone; - delete info; - delete my_dev; + pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); + pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); + free(info->curr_log_zone); + pthread_mutex_destroy(&info->size_limit_lock); + pthread_mutex_destroy(&info->zones_lock); + free(info); + free(my_dev); return 0; } -} - static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages) { - zone->num_valid_pages_lock.lock(); + pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages += num_pages; - zone->num_valid_pages_lock.unlock(); + pthread_mutex_unlock(&zone->num_valid_pages_lock); } static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages) { - zone->num_valid_pages_lock.lock(); + pthread_mutex_lock(&zone->num_valid_pages_lock); zone->num_valid_pages -= num_pages; - zone->num_valid_pages_lock.unlock(); + pthread_mutex_unlock(&zone->num_valid_pages_lock); } static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages) { - zone->write_ptr_lock.lock(); + pthread_mutex_lock(&zone->write_ptr_lock); zone->write_ptr += num_pages; - zone->write_ptr_lock.unlock(); + pthread_mutex_unlock(&zone->write_ptr_lock); } static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages) { - zone->write_ptr_lock.lock(); + pthread_mutex_lock(&zone->write_ptr_lock); zone->write_ptr -= num_pages; - zone->write_ptr_lock.unlock(); + pthread_mutex_unlock(&zone->write_ptr_lock); } static inline uint32_t get_block_index(unsigned long long page_addr, @@ -470,8 +484,7 @@ static bool read_bitmap(const uint8_t bitmap[], return true; } -static void write_bitmap(uint8_t bitmap[], - uint32_t offset, uint32_t num_pages) +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages) { while (num_pages--) { bitmap[offset >> 3U] |= 1U << (offset & 0x7U); @@ -481,7 +494,7 @@ static void write_bitmap(uint8_t bitmap[], static void change_log_zone(zns_info *info) { - info->zones_lock.lock(); + pthread_mutex_lock(&info->zones_lock); if (info->used_log_zones) info->used_log_zones_tail->next = info->curr_log_zone; else @@ -489,18 +502,18 @@ static void change_log_zone(zns_info *info) info->used_log_zones_tail = info->curr_log_zone; info->curr_log_zone = NULL; ++info->num_used_log_zones; - info->zones_lock.unlock(); + pthread_mutex_unlock(&info->zones_lock); while (info->num_used_log_zones == info->num_log_zones); //Dequeue from free_zone to curr_log_zone; while (!info->curr_log_zone) { - info->zones_lock.lock(); + pthread_mutex_lock(&info->zones_lock); if (info->num_free_zones) { info->curr_log_zone = info->free_zones; info->free_zones = info->free_zones->next; info->curr_log_zone->next = NULL; --info->num_free_zones; } - info->zones_lock.unlock(); + pthread_mutex_unlock(&info->zones_lock); } } @@ -512,14 +525,14 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; //Lock for updating page map - block->lock.lock(); + pthread_mutex_lock(&block->lock); if (!block->page_maps) { - block->page_maps = new page_map(); + block->page_maps = (page_map *)calloc(1, sizeof(page_map)); block->page_maps_tail = block->page_maps; block->page_maps->page_addr = page_addr; block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return; } if (block->page_maps->page_addr == page_addr) { @@ -527,17 +540,17 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, decrease_num_valid_page(block->page_maps->zone, 1U); block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return; } if (block->page_maps->page_addr > page_addr) { - page_map *tmp = new page_map(); + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = block->page_maps; block->page_maps = tmp; tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return; } page_map *ptr = block->page_maps; @@ -547,26 +560,26 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, decrease_num_valid_page(ptr->next->zone, 1U); ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return; } else if (ptr->next->page_addr > page_addr) { - page_map *tmp = new page_map(); + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = ptr->next; ptr->next = tmp; tmp->page_addr = page_addr; tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); return; } ptr = ptr->next; } - ptr->next = new page_map(); + ptr->next = (page_map *)calloc(1, sizeof(page_map)); block->page_maps_tail = ptr->next; ptr->next->page_addr = page_addr; ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); ++page_addr; ++physical_addr; } @@ -578,7 +591,7 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) uint32_t max_transfer_size = info->mdts; for (;;) { if (info->free_transfer_size) { - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); break; } } @@ -590,13 +603,13 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) max_transfer_size = info->free_transfer_size; info->free_transfer_size -= max_transfer_size; info->used_status |= type; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); return max_transfer_size; } else { uint32_t max_transfer_size = info->zasl; for (;;) { if (info->free_transfer_size && info->free_append_size) { - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); break; } } @@ -609,18 +622,18 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) info->free_transfer_size -= max_transfer_size; info->free_append_size -= max_transfer_size; info->used_status |= type; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); return max_transfer_size; } } static void release_transfer_size(zns_info *info, uint8_t type, unsigned size) { - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); if (type & sb_write) info->free_append_size += size; info->free_transfer_size += size; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); } static int read_from_zns(zns_info *info, unsigned long long physical_addr, @@ -635,7 +648,7 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); release_transfer_size(info, type, curr_transfer_size); physical_addr += num_pages; - buffer = (uint8_t *)buffer + curr_read_size; + buffer = (char *)buffer + curr_read_size; size -= curr_read_size; } return errno; @@ -659,7 +672,7 @@ static int append_to_data_zone(zns_info *info, zone_info *zone, release_transfer_size(info, type, curr_transfer_size); if (errno) return errno; - buffer = (uint8_t *)buffer + curr_append_size; + buffer = (char *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -698,7 +711,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, change_log_zone(info); page_addr += num_curr_append_pages; physical_addr += num_curr_append_pages; - buffer = (uint8_t *)buffer + curr_append_size; + buffer = (char *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -725,7 +738,7 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (uint8_t *)buffer + buff_offset, curr_read_size, + (char *)buffer + buff_offset, curr_read_size, gc_read); start = curr; } @@ -738,42 +751,43 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); + (char *)buffer + buff_offset, curr_read_size, gc_read); return errno; } static void merge(zns_info *info, logical_block *block) { - block->lock.lock(); + pthread_mutex_lock(&block->lock); block->old_page_maps = block->page_maps; block->page_maps = NULL; - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); uint32_t size = get_data_offset(block->page_maps_tail->page_addr, info->zone_num_pages) + 1U; if (block->data_zone && block->data_zone->write_ptr > size) size = block->data_zone->write_ptr; size *= info->page_size; - uint8_t *buffer = new uint8_t[size](); + char buffer[size]; + memset(buffer, 0, size); read_logical_block(info, block, buffer); - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); info->used_status &= ~gc_read; - info->size_limit_lock.unlock(); - block->lock.lock(); + pthread_mutex_unlock(&info->size_limit_lock); + pthread_mutex_lock(&block->lock); // Append old data zone to free zones list if (block->data_zone) { decrease_write_ptr(block->data_zone, block->data_zone->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); - info->zones_lock.lock(); + pthread_mutex_lock(&info->zones_lock); if (info->free_zones) info->free_zones_tail->next = block->data_zone; else info->free_zones = block->data_zone; info->free_zones_tail = block->data_zone; ++info->num_free_zones; - info->zones_lock.unlock(); + pthread_mutex_unlock(&info->zones_lock); } - info->zones_lock.lock(); + pthread_mutex_lock(&info->zones_lock); // Get free zone and nullify the next block->data_zone = info->free_zones; info->free_zones = info->free_zones->next; @@ -781,42 +795,42 @@ static void merge(zns_info *info, logical_block *block) info->free_zones_tail = NULL; block->data_zone->next = NULL; --info->num_free_zones; - info->zones_lock.unlock(); + pthread_mutex_unlock(&info->zones_lock); append_to_data_zone(info, block->data_zone, buffer, size, gc_write); - delete[] buffer; - info->size_limit_lock.lock(); + pthread_mutex_lock(&info->size_limit_lock); info->used_status &= ~gc_write; - info->size_limit_lock.unlock(); + pthread_mutex_unlock(&info->size_limit_lock); while (block->old_page_maps) { page_map *tmp = block->old_page_maps; block->old_page_maps = block->old_page_maps->next; - delete tmp; + free(tmp); } - block->lock.unlock(); + pthread_mutex_unlock(&block->lock); } -static void garbage_collection(zns_info *info) +static void *garbage_collection(void *info_ptr) { + zns_info *info = (zns_info *)info_ptr; uint32_t index = 0U; while (info->run_gc) { while (info->num_log_zones - info->num_used_log_zones > info->gc_wmark) { if (!info->run_gc) - return; + return NULL; } logical_block *block = &info->logical_blocks[index]; while(!block->page_maps) { index = (index + 1U) % info->num_data_zones; block = &info->logical_blocks[index]; if (!info->run_gc) - return; + return NULL; } if (!info->run_gc) - return; + return NULL; // Merge logical block to data zone merge(info, block); if (!info->run_gc) - return; + return NULL; // Check used log zone valid counter // if zero reset and add to free zone list // Remove zone from used_log_zones @@ -830,7 +844,7 @@ static void garbage_collection(zns_info *info) decrease_write_ptr(curr, curr->write_ptr); nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); - info->zones_lock.lock(); + pthread_mutex_lock(&info->zones_lock); // Remove from used_log_zones free = curr; curr = curr->next; @@ -851,7 +865,7 @@ static void garbage_collection(zns_info *info) info->free_zones = free; info->free_zones_tail = free; ++info->num_free_zones; - info->zones_lock.unlock(); + pthread_mutex_unlock(&info->zones_lock); } else { prev = curr; curr = curr->next; @@ -859,4 +873,7 @@ static void garbage_collection(zns_info *info) } index = (index + 1U) % info->num_data_zones; } + return NULL; +} + } From 134fe8eadcd8f2906cbb8e39956aed677352f180 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:49:25 +0000 Subject: [PATCH 072/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 455445c..be0b711 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -41,25 +41,25 @@ enum { }; // zone in zns -struct zone_info { +typedef struct zone_info { unsigned long long saddr; uint32_t num_valid_pages; uint32_t write_ptr; pthread_mutex_t num_valid_pages_lock; pthread_mutex_t write_ptr_lock; - zone_info *next; // linked in free_zones and used_log_zones -}; + struct zone_info *next; // linked in free_zones and used_log_zones +} zone_info; // page map for log zones -struct page_map { +typedef struct page_map { unsigned long long page_addr; unsigned long long physical_addr; zone_info *zone; - page_map *next; // page map for each logical block -}; + struct page_map *next; // page map for each logical block +} page_map; // Contains data in log zone (page map) and data in data zone (block map) -struct logical_block { +typedef struct logical_block { unsigned long long s_page_addr; page_map *page_maps; // page mapping for this logical block (log zone) page_map *old_page_maps; @@ -68,9 +68,9 @@ struct logical_block { uint8_t *bitmap; //TODO: LOCK the access pthread_mutex_t lock; -}; +} logical_block; -struct zns_info { +typedef struct zns_info { // Values from init parameters int num_log_zones; int gc_wmark; @@ -101,7 +101,7 @@ struct zns_info { pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone // logical block corresponding to each data zone logical_block *logical_blocks; -}; +} zns_info; static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages); From df57be6624ace7cac350d9817c086fc5d3174699 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:52:11 +0000 Subject: [PATCH 073/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 9 ++++----- src/m23-ftl/zns_device.h | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index be0b711..dd4ec92 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -131,8 +131,7 @@ static int read_logical_block(zns_info *info, logical_block *block, static void merge(zns_info *info, logical_block *block); static void *garbage_collection(void *info_ptr); -int init_ss_zns_device(struct zdev_init_params *params, - struct user_zns_device **my_dev) +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) { *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); @@ -248,7 +247,7 @@ int init_ss_zns_device(struct zdev_init_params *params, return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; @@ -320,7 +319,7 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, return errno; } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; @@ -385,7 +384,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, return errno; } -int deinit_ss_zns_device(struct user_zns_device *my_dev) +int deinit_ss_zns_device(user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; // Kill gc diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index a757cd2..aa8bfdc 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -34,35 +34,35 @@ extern "C" { (type *)( (char *)__mptr - offsetof(type,member) );}) /* after a successful initialization of a device, you must set these ZNS device parameters for testing */ -struct zns_device_testing_params { +typedef struct zns_device_testing_params { // LBA size at the ZNS device uint32_t zns_lba_size; // Zone size at the ZNS device uint32_t zns_zone_capacity; // total number of zones uint32_t zns_num_zones; -}; +} zns_device_testing_params; -struct user_zns_device { +typedef struct user_zns_device { /* these are user visible properties */ uint32_t lba_size_bytes; // the user device LBA size - should be some multiple of the ZNS device page size, you can keep it as it is uint64_t capacity_bytes; // total user device capacity - struct zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions + zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions // your own private data void *_private; //Points to zns_info -}; +} user_zns_device; -struct zdev_init_params { +typedef struct zdev_init_params { char *name; int log_zones; int gc_wmark; bool force_reset; -}; +} zdev_init_params; -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int deinit_ss_zns_device(struct user_zns_device *my_dev); +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev); +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int deinit_ss_zns_device(user_zns_device *my_dev); }; From c10f2d3e9530f58fc58d9f5f4dcd2b9b6d5731bc Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:56:25 +0000 Subject: [PATCH 074/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 75 +++++++++++++++----------------------- src/m23-ftl/zns_device.h | 22 +++++------ 2 files changed, 40 insertions(+), 57 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index fb59f3c..dd4ec92 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -27,8 +27,6 @@ SOFTWARE. #include #include #include -#include -#include #include "zns_device.h" extern "C" { @@ -43,25 +41,25 @@ enum { }; // zone in zns -struct zone_info { +typedef struct zone_info { unsigned long long saddr; uint32_t num_valid_pages; uint32_t write_ptr; pthread_mutex_t num_valid_pages_lock; pthread_mutex_t write_ptr_lock; - zone_info *next; // linked in free_zones and used_log_zones -}; + struct zone_info *next; // linked in free_zones and used_log_zones +} zone_info; // page map for log zones -struct page_map { +typedef struct page_map { unsigned long long page_addr; unsigned long long physical_addr; zone_info *zone; - page_map *next; // page map for each logical block -}; + struct page_map *next; // page map for each logical block +} page_map; // Contains data in log zone (page map) and data in data zone (block map) -struct logical_block { +typedef struct logical_block { unsigned long long s_page_addr; page_map *page_maps; // page mapping for this logical block (log zone) page_map *old_page_maps; @@ -70,9 +68,9 @@ struct logical_block { uint8_t *bitmap; //TODO: LOCK the access pthread_mutex_t lock; -}; +} logical_block; -struct zns_info { +typedef struct zns_info { // Values from init parameters int num_log_zones; int gc_wmark; @@ -103,7 +101,7 @@ struct zns_info { pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone // logical block corresponding to each data zone logical_block *logical_blocks; -}; +} zns_info; static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages); @@ -113,16 +111,15 @@ static inline uint32_t get_block_index(unsigned long long page_addr, uint32_t zone_num_pages); static inline uint32_t get_data_offset(unsigned long long page_addr, uint32_t zone_num_pages); -static bool read_bitmap(logical_block *block, +static bool read_bitmap(const uint8_t bitmap[], uint32_t offset, uint32_t num_pages); -static void write_bitmap(logical_block *block, - uint32_t offset, uint32_t num_pages); +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages); static void change_log_zone(zns_info *info); static void update_page_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr, uint32_t num_pages); static unsigned request_transfer_size(zns_info *info, uint8_t type); -static void free_transfer_size(zns_info *info, uint8_t type, unsigned size); +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size); static int read_from_zns(zns_info *info, unsigned long long physical_addr, void *buffer, uint32_t size, uint8_t type); static int append_to_data_zone(zns_info *info, zone_info *zone, @@ -134,8 +131,7 @@ static int read_logical_block(zns_info *info, logical_block *block, static void merge(zns_info *info, logical_block *block); static void *garbage_collection(void *info_ptr); -int init_ss_zns_device(struct zdev_init_params *params, - struct user_zns_device **my_dev) +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) { *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); @@ -202,23 +198,11 @@ int init_ss_zns_device(struct zdev_init_params *params, // set max_data_transfer_size nvme_id_ctrl id0; nvme_identify_ctrl(info->fd, &id0); - void *regs = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, info->fd, 0L); - if (errno) { - printf("Failed to mmap\n"); - return errno; - } - info->mdts = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id0.mdts)) - - 2U) * info->page_size; + info->mdts = ((1U << id0.mdts) - 2U) * info->page_size; // set zone_append_size_limit nvme_zns_id_ctrl id1; nvme_zns_identify_ctrl(info->fd, &id1); - info->zasl = ((1U << (NVME_CAP_MPSMIN(nvme_mmio_read64(regs)) + id1.zasl)) - - 2U) * info->page_size; - munmap(regs, getpagesize()); - if (errno) { - printf("Failed to munmap\n"); - return errno; - } + info->zasl = ((1U << id1.zasl) - 2U) * info->page_size; info->free_transfer_size = info->mdts; info->free_append_size = info->zasl; pthread_mutex_init(&info->size_limit_lock, NULL); @@ -263,7 +247,7 @@ int init_ss_zns_device(struct zdev_init_params *params, return 0; } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; @@ -276,7 +260,7 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - if (!read_bitmap(block, offset, curr_block_read_size / info->page_size)) + if (!read_bitmap(block->bitmap, offset, curr_block_read_size / info->page_size)) return -1; pthread_mutex_lock(&block->lock); if (block->data_zone) { @@ -335,7 +319,7 @@ int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, return errno; } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size) { zns_info *info = (zns_info *)my_dev->_private; @@ -389,7 +373,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, if (ret) return ret; } - write_bitmap(block, offset, curr_append_size / info->page_size); + write_bitmap(block->bitmap, offset, curr_append_size / info->page_size); address += curr_append_size; buffer = (char *)buffer + curr_append_size; size -= curr_append_size; @@ -400,7 +384,7 @@ int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, return errno; } -int deinit_ss_zns_device(struct user_zns_device *my_dev) +int deinit_ss_zns_device(user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; // Kill gc @@ -488,22 +472,21 @@ static inline uint32_t get_data_offset(unsigned long long page_addr, return page_addr % zone_num_pages; } -static bool read_bitmap(logical_block *block, +static bool read_bitmap(const uint8_t bitmap[], uint32_t offset, uint32_t num_pages) { while (num_pages--) { - if (!(block->bitmap[offset >> 3U] & 1U << (offset & 0x7U))) + if (!(bitmap[offset >> 3U] & 1U << (offset & 0x7U))) return false; ++offset; } return true; } -static void write_bitmap(logical_block *block, - uint32_t offset, uint32_t num_pages) +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages) { while (num_pages--) { - block->bitmap[offset >> 3U] |= 1U << (offset & 0x7U); + bitmap[offset >> 3U] |= 1U << (offset & 0x7U); ++offset; } } @@ -643,7 +626,7 @@ static unsigned request_transfer_size(zns_info *info, uint8_t type) } } -static void free_transfer_size(zns_info *info, uint8_t type, unsigned size) +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size) { pthread_mutex_lock(&info->size_limit_lock); if (type & sb_write) @@ -662,7 +645,7 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, unsigned short num_pages = curr_read_size / info->page_size; nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); - free_transfer_size(info, type, curr_transfer_size); + release_transfer_size(info, type, curr_transfer_size); physical_addr += num_pages; buffer = (char *)buffer + curr_read_size; size -= curr_read_size; @@ -685,7 +668,7 @@ static int append_to_data_zone(zns_info *info, zone_info *zone, nvme_zns_append(info->fd, info->nsid, zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); - free_transfer_size(info, type, curr_transfer_size); + release_transfer_size(info, type, curr_transfer_size); if (errno) return errno; buffer = (char *)buffer + curr_append_size; @@ -717,7 +700,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, num_curr_append_pages - 1, 0U, 0U, 0U, 0U, curr_append_size, buffer, 0U, NULL, &physical_addr); - free_transfer_size(info, user_write, curr_transfer_size); + release_transfer_size(info, user_write, curr_transfer_size); if (errno) return errno; increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index a757cd2..aa8bfdc 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -34,35 +34,35 @@ extern "C" { (type *)( (char *)__mptr - offsetof(type,member) );}) /* after a successful initialization of a device, you must set these ZNS device parameters for testing */ -struct zns_device_testing_params { +typedef struct zns_device_testing_params { // LBA size at the ZNS device uint32_t zns_lba_size; // Zone size at the ZNS device uint32_t zns_zone_capacity; // total number of zones uint32_t zns_num_zones; -}; +} zns_device_testing_params; -struct user_zns_device { +typedef struct user_zns_device { /* these are user visible properties */ uint32_t lba_size_bytes; // the user device LBA size - should be some multiple of the ZNS device page size, you can keep it as it is uint64_t capacity_bytes; // total user device capacity - struct zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions + zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions // your own private data void *_private; //Points to zns_info -}; +} user_zns_device; -struct zdev_init_params { +typedef struct zdev_init_params { char *name; int log_zones; int gc_wmark; bool force_reset; -}; +} zdev_init_params; -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int deinit_ss_zns_device(struct user_zns_device *my_dev); +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev); +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int deinit_ss_zns_device(user_zns_device *my_dev); }; From 24d6947ecfca982b052a4a59d8a2c3d0ffcf654e Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:57:24 +0000 Subject: [PATCH 075/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index dd4ec92..8deb8ec 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -260,7 +260,8 @@ int zns_udevice_read(user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - if (!read_bitmap(block->bitmap, offset, curr_block_read_size / info->page_size)) + if (!read_bitmap(block->bitmap, offset, + curr_block_read_size / info->page_size)) return -1; pthread_mutex_lock(&block->lock); if (block->data_zone) { From 31425ec4f849b2234a079a0a561f63d1ef25b78c Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 17:57:56 +0000 Subject: [PATCH 076/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index dd4ec92..8deb8ec 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -260,7 +260,8 @@ int zns_udevice_read(user_zns_device *my_dev, uint64_t address, info->page_size; if (curr_block_read_size > size) curr_block_read_size = size; - if (!read_bitmap(block->bitmap, offset, curr_block_read_size / info->page_size)) + if (!read_bitmap(block->bitmap, offset, + curr_block_read_size / info->page_size)) return -1; pthread_mutex_lock(&block->lock); if (block->data_zone) { From b790b502f9de5dfc0828f7f22384d48ae47c048d Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 18:30:08 +0000 Subject: [PATCH 077/101] final ftl without restore --- src/m23-ftl/zns_device.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 8deb8ec..0c77d04 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -160,6 +160,8 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) printf("Zone reset failed %d\n", ret); return ret; } + } else { + ; } // set zns_lba_size or page_size : Its same for now! nvme_id_ns ns; @@ -236,9 +238,9 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; info->logical_blocks[i].bitmap = (uint8_t *) - calloc(info->num_data_zones * - info->zone_num_pages >> 3UL, - sizeof(uint8_t)); + calloc((info->num_data_zones * + info->zone_num_pages + 1UL + >> 3UL), sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } //Start GC From dbe59832a6f183e2d8256bdb629415b00a5d9ea2 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 20:31:18 +0000 Subject: [PATCH 078/101] clean all log zone and keep first zone clean --- src/m23-ftl/zns_device.cpp | 93 ++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 0c77d04..b1ffaeb 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -121,9 +121,9 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, static unsigned request_transfer_size(zns_info *info, uint8_t type); static void release_transfer_size(zns_info *info, uint8_t type, unsigned size); static int read_from_zns(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size, uint8_t type); + void *buffer, uint64_t size, uint8_t type); static int append_to_data_zone(zns_info *info, zone_info *zone, - void *buffer, uint32_t size, uint8_t type); + void *buffer, uint64_t size, uint8_t type); static int append_to_log_zone(zns_info *info, unsigned long long page_addr, void *buffer, uint32_t size); static int read_logical_block(zns_info *info, logical_block *block, @@ -238,8 +238,8 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; info->logical_blocks[i].bitmap = (uint8_t *) - calloc((info->num_data_zones * - info->zone_num_pages + 1UL + calloc(((info->num_data_zones * + info->zone_num_pages + 1UL) >> 3UL), sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } @@ -296,7 +296,7 @@ int zns_udevice_read(user_zns_device *my_dev, uint64_t address, start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, + (uint8_t *)buffer + buff_offset, curr_read_size, user_read); start = curr; } @@ -308,12 +308,12 @@ int zns_udevice_read(user_zns_device *my_dev, uint64_t address, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, + (uint8_t *)buffer + buff_offset, curr_read_size, user_read); } pthread_mutex_unlock(&block->lock); page_addr += curr_block_read_size / info->page_size; - buffer = (char *)buffer + curr_block_read_size; + buffer = (uint8_t *)buffer + curr_block_read_size; size -= curr_block_read_size; } pthread_mutex_lock(&info->size_limit_lock); @@ -341,7 +341,7 @@ int zns_udevice_write(user_zns_device *my_dev, uint64_t address, // append null data until arrive offset uint32_t null_size = (offset - block->data_zone->write_ptr) * info->page_size; - char null_buffer[null_size]; + uint8_t null_buffer[null_size]; memset(null_buffer, 0, null_size); int ret = append_to_data_zone(info, block->data_zone, null_buffer, null_size, @@ -378,7 +378,7 @@ int zns_udevice_write(user_zns_device *my_dev, uint64_t address, } write_bitmap(block->bitmap, offset, curr_append_size / info->page_size); address += curr_append_size; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } pthread_mutex_lock(&info->size_limit_lock); @@ -391,6 +391,15 @@ int deinit_ss_zns_device(user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; // Kill gc + pthread_mutex_lock(&info->zones_lock); + if (info->used_log_zones) + info->used_log_zones_tail->next = info->curr_log_zone; + else + info->used_log_zones = info->curr_log_zone; + info->used_log_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; + ++info->num_used_log_zones; + pthread_mutex_unlock(&info->zones_lock); info->run_gc = false; pthread_join(info->gc_thread, NULL); logical_block *blocks = info->logical_blocks; @@ -425,9 +434,6 @@ int deinit_ss_zns_device(user_zns_device *my_dev) pthread_mutex_destroy(&tmp->write_ptr_lock); free(tmp); } - pthread_mutex_destroy(&info->curr_log_zone->num_valid_pages_lock); - pthread_mutex_destroy(&info->curr_log_zone->write_ptr_lock); - free(info->curr_log_zone); pthread_mutex_destroy(&info->size_limit_lock); pthread_mutex_destroy(&info->zones_lock); free(info); @@ -639,7 +645,7 @@ static void release_transfer_size(zns_info *info, uint8_t type, unsigned size) } static int read_from_zns(zns_info *info, unsigned long long physical_addr, - void *buffer, uint32_t size, uint8_t type) + void *buffer, uint64_t size, uint8_t type) { while (size) { unsigned curr_transfer_size = request_transfer_size(info, type); @@ -650,14 +656,14 @@ static int read_from_zns(zns_info *info, unsigned long long physical_addr, 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); release_transfer_size(info, type, curr_transfer_size); physical_addr += num_pages; - buffer = (char *)buffer + curr_read_size; + buffer = (uint8_t *)buffer + curr_read_size; size -= curr_read_size; } return errno; } static int append_to_data_zone(zns_info *info, zone_info *zone, - void *buffer, uint32_t size, uint8_t type) + void *buffer, uint64_t size, uint8_t type) { increase_write_ptr(zone, size / info->page_size); while (size) { @@ -674,7 +680,7 @@ static int append_to_data_zone(zns_info *info, zone_info *zone, release_transfer_size(info, type, curr_transfer_size); if (errno) return errno; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -713,7 +719,7 @@ static int append_to_log_zone(zns_info *info, unsigned long long page_addr, change_log_zone(info); page_addr += num_curr_append_pages; physical_addr += num_curr_append_pages; - buffer = (char *)buffer + curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; size -= curr_append_size; } return errno; @@ -740,7 +746,7 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, + (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); start = curr; } @@ -753,7 +759,7 @@ static int read_logical_block(zns_info *info, logical_block *block, uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (char *)buffer + buff_offset, curr_read_size, gc_read); + (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); return errno; } @@ -768,7 +774,7 @@ static void merge(zns_info *info, logical_block *block) if (block->data_zone && block->data_zone->write_ptr > size) size = block->data_zone->write_ptr; size *= info->page_size; - char buffer[size]; + uint8_t buffer[size]; memset(buffer, 0, size); read_logical_block(info, block, buffer); pthread_mutex_lock(&info->size_limit_lock); @@ -814,25 +820,18 @@ static void *garbage_collection(void *info_ptr) { zns_info *info = (zns_info *)info_ptr; uint32_t index = 0U; - while (info->run_gc) { - while (info->num_log_zones - info->num_used_log_zones > - info->gc_wmark) { - if (!info->run_gc) - return NULL; - } + for (;;) { + while (info->run_gc && + info->num_log_zones - info->num_used_log_zones > info->gc_wmark); + if (!info->num_used_log_zones) + break; logical_block *block = &info->logical_blocks[index]; while(!block->page_maps) { index = (index + 1U) % info->num_data_zones; block = &info->logical_blocks[index]; - if (!info->run_gc) - return NULL; } - if (!info->run_gc) - return NULL; // Merge logical block to data zone merge(info, block); - if (!info->run_gc) - return NULL; // Check used log zone valid counter // if zero reset and add to free zone list // Remove zone from used_log_zones @@ -840,7 +839,7 @@ static void *garbage_collection(void *info_ptr) zone_info *prev = NULL; zone_info *free = NULL; zone_info *curr = info->used_log_zones; - while (info->run_gc && curr) { + while (curr) { if (!curr->num_valid_pages) { // reset decrease_write_ptr(curr, curr->write_ptr); @@ -875,6 +874,34 @@ static void *garbage_collection(void *info_ptr) } index = (index + 1U) % info->num_data_zones; } + for (zone_info *zone = info->free_zones; zone; zone = zone->next) { + if (!zone->saddr) + return NULL; + } + logical_block *block = NULL; + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + if (info->logical_blocks[i].data_zone && + !info->logical_blocks[i].data_zone->saddr) { + block = &info->logical_blocks[i]; + break; + } + } + uint64_t size = block->data_zone->write_ptr * info->page_size; + uint8_t buffer[size]; + read_from_zns(info, block->data_zone->saddr, buffer, size, gc_read); + info->used_status &= ~gc_read; + zone_info *old_data_zone = block->data_zone; + old_data_zone->write_ptr = 0U; + nvme_zns_mgmt_send(info->fd, info->nsid, old_data_zone->saddr, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); + block->data_zone = info->free_zones; + old_data_zone->next = info->free_zones->next; + if (info->num_free_zones == 1U) + info->free_zones_tail = old_data_zone; + info->free_zones = old_data_zone; + block->data_zone->next = NULL; + append_to_data_zone(info, block->data_zone, buffer, size, gc_write); + info->used_status &= ~gc_write; return NULL; } From a67b6447a25aabb2cda9b81659e5fe8a55c00437 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 20:38:36 +0000 Subject: [PATCH 079/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index b1ffaeb..1e999c0 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -874,10 +874,12 @@ static void *garbage_collection(void *info_ptr) } index = (index + 1U) % info->num_data_zones; } + // check the first zone is free zone or not for (zone_info *zone = info->free_zones; zone; zone = zone->next) { if (!zone->saddr) return NULL; } + // find which logical block has the first zone logical_block *block = NULL; for (uint32_t i = 0U; i < info->num_data_zones; ++i) { if (info->logical_blocks[i].data_zone && @@ -886,6 +888,7 @@ static void *garbage_collection(void *info_ptr) break; } } + // clean the first zone uint64_t size = block->data_zone->write_ptr * info->page_size; uint8_t buffer[size]; read_from_zns(info, block->data_zone->saddr, buffer, size, gc_read); From 64849e3f76c3b82cf608b4f42a18249197d45b18 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 20:46:59 +0000 Subject: [PATCH 080/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 1e999c0..a8c0881 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -775,7 +775,6 @@ static void merge(zns_info *info, logical_block *block) size = block->data_zone->write_ptr; size *= info->page_size; uint8_t buffer[size]; - memset(buffer, 0, size); read_logical_block(info, block, buffer); pthread_mutex_lock(&info->size_limit_lock); info->used_status &= ~gc_read; From 6e4c79e4cda015aaae7df78bac490480e12d89ae Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sat, 15 Oct 2022 20:52:29 +0000 Subject: [PATCH 081/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index a8c0881..f4e8813 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -32,6 +32,7 @@ SOFTWARE. extern "C" { enum { + dev_write = 0x0, user_read = 0x1, gc_read = 0x2, sb_read = user_read | gc_read, From 8bd920fcdd2cb91704399987d713b15209e0701b Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 06:03:51 +0000 Subject: [PATCH 082/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index f4e8813..cfeb4b2 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -239,9 +239,9 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; info->logical_blocks[i].bitmap = (uint8_t *) - calloc(((info->num_data_zones * - info->zone_num_pages + 1UL) - >> 3UL), sizeof(uint8_t)); + calloc(((info->zone_num_pages - 1UL) + >> 3UL) + 1UL, + sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } //Start GC From 7536f349b4dda3743d96efa6c787e61bbfd74922 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 06:39:37 +0000 Subject: [PATCH 083/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index cfeb4b2..d34879b 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -407,11 +407,6 @@ int deinit_ss_zns_device(user_zns_device *my_dev) // free hashmap for (uint32_t i = 0U; i < info->num_data_zones; ++i) { // Clear all log heads for a logical block - while (blocks[i].page_maps) { - page_map *tmp = blocks[i].page_maps; - blocks[i].page_maps = blocks[i].page_maps->next; - free(tmp); - } if (blocks[i].data_zone) { pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); @@ -421,13 +416,6 @@ int deinit_ss_zns_device(user_zns_device *my_dev) pthread_mutex_destroy(&blocks[i].lock); } free(blocks); - while (info->used_log_zones) { - zone_info *tmp = info->used_log_zones; - info->used_log_zones = info->used_log_zones->next; - pthread_mutex_destroy(&tmp->num_valid_pages_lock); - pthread_mutex_destroy(&tmp->write_ptr_lock); - free(tmp); - } while (info->free_zones) { zone_info *tmp = info->free_zones; info->free_zones = info->free_zones->next; From 3e8f642d23ff9ca8988fc5492a179381ada8447c Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 07:16:33 +0000 Subject: [PATCH 084/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 143 +++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 69 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index d34879b..376f1c2 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -35,73 +35,77 @@ enum { dev_write = 0x0, user_read = 0x1, gc_read = 0x2, - sb_read = user_read | gc_read, + sb_read = user_read | gc_read, // user or gc is reading user_write = 0x10, gc_write = 0x20, - sb_write = user_write | gc_write + sb_write = user_write | gc_write // user or gc is writing }; // zone in zns typedef struct zone_info { - unsigned long long saddr; - uint32_t num_valid_pages; - uint32_t write_ptr; + unsigned long long saddr; // starting physical address + uint32_t num_valid_pages; // the number of valid pages (used for log zone) pthread_mutex_t num_valid_pages_lock; + uint32_t write_ptr; // writer pointer (used for data zone) pthread_mutex_t write_ptr_lock; - struct zone_info *next; // linked in free_zones and used_log_zones + struct zone_info *next; // linked in used_log_zones and free_zones } zone_info; // page map for log zones typedef struct page_map { - unsigned long long page_addr; - unsigned long long physical_addr; - zone_info *zone; - struct page_map *next; // page map for each logical block + unsigned long long page_addr; // logical page address + unsigned long long physical_addr; // phisical address + zone_info *zone; // the zone this page map in + struct page_map *next; } page_map; // Contains data in log zone (page map) and data in data zone (block map) typedef struct logical_block { - unsigned long long s_page_addr; + unsigned long long s_page_addr; // starting logical page address page_map *page_maps; // page mapping for this logical block (log zone) - page_map *old_page_maps; + page_map *old_page_maps; // temporily store old page maps while gc page_map *page_maps_tail; zone_info *data_zone; // block mapping for this logical block (data zone) uint8_t *bitmap; - //TODO: LOCK the access pthread_mutex_t lock; } logical_block; typedef struct zns_info { - // Values from init parameters - int num_log_zones; - int gc_wmark; - pthread_t gc_thread; - bool run_gc; - // Query the nsid for following info + // information of device int fd; unsigned nsid; uint32_t page_size; + uint32_t zone_num_pages; uint32_t num_zones; + uint32_t num_log_zones; uint32_t num_data_zones; - uint32_t zone_num_pages; - uint32_t mdts; // max data transfer size (read + append limit) - uint32_t zasl; // zone append size limit (append limit) - uint8_t used_status; + // max data transfer size (read + append limit) + uint32_t mdts; + // zone append size limit (append limit) + uint32_t zasl; + // load balancing varaible uint32_t free_transfer_size; uint32_t free_append_size; + uint8_t used_status; pthread_mutex_t size_limit_lock; - // Log zones + // logical block corresponding to each data zone + logical_block *logical_blocks; + // current log zone zone_info *curr_log_zone; - int num_used_log_zones; + // used log zone zone_info *used_log_zones; zone_info *used_log_zones_tail; + uint32_t num_used_log_zones; // Free zones - uint32_t num_free_zones; zone_info *free_zones; zone_info *free_zones_tail; - pthread_mutex_t zones_lock; // Lock for changing used_log_zone and free_zone - // logical block corresponding to each data zone - logical_block *logical_blocks; + uint32_t num_free_zones; + // Lock for changing used_log_zone and free_zone + pthread_mutex_t zones_lock; + // garbage collection variable + uint32_t gc_wmark; + bool run_gc; + pthread_t gc_thread; } zns_info; static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); @@ -137,10 +141,6 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); zns_info *info = (zns_info *)(*my_dev)->_private; - // set num_log_zones - info->num_log_zones = params->log_zones; - // set gc_wmark - info->gc_wmark = params->gc_wmark; // set fd info->fd = nvme_open(params->name); if (info->fd < 0) { @@ -153,17 +153,6 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) printf("Error: failed to retrieve the namespace id %d\n", ret); return ret; } - // reset device - if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, - NVME_ZNS_ZSA_RESET, 0U, NULL); - if (ret) { - printf("Zone reset failed %d\n", ret); - return ret; - } - } else { - ; - } // set zns_lba_size or page_size : Its same for now! nvme_id_ns ns; ret = nvme_identify_ns(info->fd, info->nsid, &ns); @@ -174,6 +163,13 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->page_size = 1U << ns.lbaf[ns.flbas & 0xF].ds; (*my_dev)->tparams.zns_lba_size = info->page_size; (*my_dev)->lba_size_bytes = info->page_size; + // set zone_num_pages + nvme_zns_id_ns data; + nvme_zns_identify_ns(info->fd, info->nsid, &data); + info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = zone_num_pages * page_size + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + info->page_size; // set num_zones nvme_zone_report zns_report; ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, @@ -186,32 +182,48 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) } info->num_zones = le64_to_cpu(zns_report.nr_zones); (*my_dev)->tparams.zns_num_zones = info->num_zones; + // set num_log_zones + info->num_log_zones = params->log_zones; // set num_data_zones = num_zones - num_log_zones info->num_data_zones = info->num_zones - info->num_log_zones; - // set zone_num_pages - nvme_zns_id_ns data; - nvme_zns_identify_ns(info->fd, info->nsid, &data); - info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; - // set zns_zone_capacity = #page_per_zone * zone_size - (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * - info->page_size; - // set user capacity bytes = #data_zones * zone_capacity + // set user capacity bytes = num_data_zones * zone_capacity (*my_dev)->capacity_bytes = (info->num_data_zones) * (*my_dev)->tparams.zns_zone_capacity; - // set max_data_transfer_size + // set max_data_transfer_size and free_transfer_size nvme_id_ctrl id0; nvme_identify_ctrl(info->fd, &id0); info->mdts = ((1U << id0.mdts) - 2U) * info->page_size; - // set zone_append_size_limit + info->free_transfer_size = info->mdts; + // set zone_append_size_limit and free_append_size nvme_zns_id_ctrl id1; nvme_zns_identify_ctrl(info->fd, &id1); info->zasl = ((1U << id1.zasl) - 2U) * info->page_size; - info->free_transfer_size = info->mdts; info->free_append_size = info->zasl; + // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - // init zones_lock - pthread_mutex_init(&info->zones_lock, NULL); - // set all zone index to free_zones + // reset device + if (params->force_reset) { + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return ret; + } + } else { + ; + } + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(((info->zone_num_pages - 1UL) + >> 3UL) + 1UL, + sizeof(uint8_t)); + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + } + // set all zone to free_zones info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); info->free_zones_tail = info->free_zones; pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); @@ -233,17 +245,10 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_zones_tail = NULL; info->curr_log_zone->next = NULL; --info->num_free_zones; - // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); - for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; - info->logical_blocks[i].bitmap = (uint8_t *) - calloc(((info->zone_num_pages - 1UL) - >> 3UL) + 1UL, - sizeof(uint8_t)); - pthread_mutex_init(&info->logical_blocks[i].lock, NULL); - } + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); + // set gc_wmark + info->gc_wmark = params->gc_wmark; //Start GC info->run_gc = true; pthread_create(&info->gc_thread, NULL, &garbage_collection, info); From 86347763c121dc5bdb7569647906be3cce78a70d Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 07:25:37 +0000 Subject: [PATCH 085/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 376f1c2..3b0f905 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -183,7 +183,7 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->num_zones = le64_to_cpu(zns_report.nr_zones); (*my_dev)->tparams.zns_num_zones = info->num_zones; // set num_log_zones - info->num_log_zones = params->log_zones; + info->num_log_zones = params->log_zones > 0 ? params->log_zones : 0U; // set num_data_zones = num_zones - num_log_zones info->num_data_zones = info->num_zones - info->num_log_zones; // set user capacity bytes = num_data_zones * zone_capacity @@ -248,7 +248,7 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) // init zones_lock pthread_mutex_init(&info->zones_lock, NULL); // set gc_wmark - info->gc_wmark = params->gc_wmark; + info->gc_wmark = params->gc_wmark > 0 ? params->gc_wmark : 0U; //Start GC info->run_gc = true; pthread_create(&info->gc_thread, NULL, &garbage_collection, info); From 36cc0856a82c5b03941fcb7026fcf8045916f380 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 07:29:45 +0000 Subject: [PATCH 086/101] clean all log zone and keep the first zone clean --- src/m23-ftl/zns_device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 3b0f905..ce786c5 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -302,8 +302,8 @@ int zns_udevice_read(user_zns_device *my_dev, uint64_t address, start->page_addr + 1ULL) * info->page_size; read_from_zns(info, start->physical_addr, - (uint8_t *)buffer + buff_offset, curr_read_size, - user_read); + (uint8_t *)buffer + buff_offset, + curr_read_size, user_read); start = curr; } prev = curr; From 9770f116035da8eba1fc3ea977a1dab4bd015474 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 12:43:43 +0000 Subject: [PATCH 087/101] not yet restore data --- src/m1/device.cpp | 9 +-- src/m1/device.h | 2 + src/m23-ftl/zns_device.cpp | 114 ++++++++++++++++++++++++------------- 3 files changed, 82 insertions(+), 43 deletions(-) diff --git a/src/m1/device.cpp b/src/m1/device.cpp index bcaa22d..5412ec6 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include +#include #include #include #include @@ -216,7 +217,7 @@ int show_zns_zone_status(const int &fd, const unsigned &nsid, num_zones * sizeof(nvme_zns_desc); std::unique_ptr zone_reports(new char[total_size]()); ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, - NVME_ZNS_ZRAS_REPORT_ALL, true, + NVME_ZNS_ZRAS_REPORT_ALL, false, total_size, zone_reports.get()); if (ret) { std::cerr << "failed to report zones, ret " << ret << std::endl; @@ -263,8 +264,8 @@ int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, { //FIXME: while (buf_size) { - unsigned size = buf_size < (mdts_size - 2U) * lba_size ? - buf_size : (mdts_size - 2U) * lba_size; + unsigned size = buf_size < (mdts_size - 1U) * lba_size ? + buf_size : (mdts_size - 1U) * lba_size; unsigned short no_blocks = size / lba_size; if (read) ss_nvme_device_read(fd, nsid, slba, no_blocks, buffer, size); diff --git a/src/m1/device.h b/src/m1/device.h index 7fe1173..73eb26d 100644 --- a/src/m1/device.h +++ b/src/m1/device.h @@ -24,6 +24,8 @@ SOFTWARE. #ifndef STOSYS_PROJECT_DEVICE_H #define STOSYS_PROJECT_DEVICE_H +#include + extern "C" { // we will use an ss_ extension // to differentiate our struct definitions from the standard library diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index ce786c5..532338a 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -25,6 +25,7 @@ SOFTWARE. #include #include #include +#include #include #include #include "zns_device.h" @@ -32,7 +33,7 @@ SOFTWARE. extern "C" { enum { - dev_write = 0x0, + dev_io = 0x0, user_read = 0x1, gc_read = 0x2, sb_read = user_read | gc_read, // user or gc is reading @@ -62,11 +63,11 @@ typedef struct page_map { // Contains data in log zone (page map) and data in data zone (block map) typedef struct logical_block { unsigned long long s_page_addr; // starting logical page address + uint8_t *bitmap; page_map *page_maps; // page mapping for this logical block (log zone) page_map *old_page_maps; // temporily store old page maps while gc page_map *page_maps_tail; zone_info *data_zone; // block mapping for this logical block (data zone) - uint8_t *bitmap; pthread_mutex_t lock; } logical_block; @@ -90,6 +91,7 @@ typedef struct zns_info { pthread_mutex_t size_limit_lock; // logical block corresponding to each data zone logical_block *logical_blocks; + uint32_t bitmap_size; // current log zone zone_info *curr_log_zone; // used log zone @@ -171,16 +173,18 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * info->page_size; // set num_zones - nvme_zone_report zns_report; + unsigned zns_report_size = sizeof(nvme_zone_report) + sizeof(nvme_zns_desc); + nvme_zone_report *zns_report = (nvme_zone_report *)calloc(1UL, + zns_report_size); ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, false, - sizeof(zns_report), &zns_report); + zns_report_size, zns_report); if (ret) { printf("Failed to report zones, ret %d\n", ret); return ret; } - info->num_zones = le64_to_cpu(zns_report.nr_zones); + info->num_zones = le64_to_cpu(zns_report->nr_zones); (*my_dev)->tparams.zns_num_zones = info->num_zones; // set num_log_zones info->num_log_zones = params->log_zones > 0 ? params->log_zones : 0U; @@ -201,28 +205,6 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_append_size = info->zasl; // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - // reset device - if (params->force_reset) { - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, - NVME_ZNS_ZSA_RESET, 0U, NULL); - if (ret) { - printf("Zone reset failed %d\n", ret); - return ret; - } - } else { - ; - } - // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); - for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; - info->logical_blocks[i].bitmap = (uint8_t *) - calloc(((info->zone_num_pages - 1UL) - >> 3UL) + 1UL, - sizeof(uint8_t)); - pthread_mutex_init(&info->logical_blocks[i].lock, NULL); - } // set all zone to free_zones info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); info->free_zones_tail = info->free_zones; @@ -236,17 +218,49 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); } - // set num_free_zones - info->num_free_zones = info->num_zones; - //Set current log zone to 0th zone - info->curr_log_zone = info->free_zones; - info->free_zones = info->free_zones->next; - if (!info->free_zones) - info->free_zones_tail = NULL; - info->curr_log_zone->next = NULL; - --info->num_free_zones; - // init zones_lock - pthread_mutex_init(&info->zones_lock, NULL); + // reset device + if (params->force_reset) { + free(zns_report); + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return ret; + } + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(info->bitmap_size, + sizeof(uint8_t)); + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + } + // set num_free_zones + info->num_free_zones = info->num_zones; + //Set current log zone to 0th zone + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; + info->curr_log_zone->next = NULL; + --info->num_free_zones; + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); + } else { + uint32_t read_size = le64_to_cpu(zns_report->entries[0].wp) * + info->page_size; + free(zns_report); + uint8_t *blocks_info = (uint8_t *)calloc(read_size, sizeof(uint8_t)); + read_from_zns(info, 0ULL, blocks_info, read_size, dev_io); + nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); + uint8_t *ptr = blocks_info; + std::unordered_set used_zones_index; + free(blocks_info); + } // set gc_wmark info->gc_wmark = params->gc_wmark > 0 ? params->gc_wmark : 0U; //Start GC @@ -408,19 +422,41 @@ int deinit_ss_zns_device(user_zns_device *my_dev) pthread_mutex_unlock(&info->zones_lock); info->run_gc = false; pthread_join(info->gc_thread, NULL); + uint64_t block_info_size = info->bitmap_size + sizeof(uint8_t) + + sizeof(zone_info::saddr) + + sizeof(zone_info::write_ptr); + uint64_t append_size = ((info->num_data_zones * block_info_size - 1UL) / + info->page_size + 1UL) * info->page_size; + uint8_t *blocks_info = (uint8_t *)calloc(1UL, append_size); + uint8_t *ptr = blocks_info; logical_block *blocks = info->logical_blocks; // free hashmap for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + memcpy(ptr, blocks[i].bitmap, info->bitmap_size); + free(blocks[i].bitmap); + ptr += info->bitmap_size; // Clear all log heads for a logical block if (blocks[i].data_zone) { pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); + memset(ptr, 1, sizeof(uint8_t)); + ptr += sizeof(uint8_t); + memcpy(ptr, &blocks[i].data_zone->saddr, sizeof(zone_info::saddr)); + ptr += sizeof(zone_info::saddr); + memcpy(ptr, &blocks[i].data_zone->write_ptr, + sizeof(zone_info::write_ptr)); + ptr += sizeof(zone_info::write_ptr); free(blocks[i].data_zone); + } else { + ptr += sizeof(bool) + sizeof(zone_info::saddr) + + sizeof(zone_info::write_ptr); } - free(blocks[i].bitmap); pthread_mutex_destroy(&blocks[i].lock); } free(blocks); + append_to_data_zone(info, info->free_zones, + blocks_info, append_size, dev_io); + free(blocks_info); while (info->free_zones) { zone_info *tmp = info->free_zones; info->free_zones = info->free_zones->next; From 7e633a2fa2b1c74a6d55e1337afc34e190e52c20 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 14:38:29 +0000 Subject: [PATCH 088/101] ftl done --- src/m23-ftl/zns_device.cpp | 157 ++++++++++++++++++++++++++++--------- 1 file changed, 118 insertions(+), 39 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 532338a..5b2dd43 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -33,13 +33,14 @@ SOFTWARE. extern "C" { enum { - dev_io = 0x0, user_read = 0x1, gc_read = 0x2, - sb_read = user_read | gc_read, // user or gc is reading + dev_read = 0x4, + sb_read = user_read | gc_read | dev_read, // user or gc is reading user_write = 0x10, gc_write = 0x20, - sb_write = user_write | gc_write // user or gc is writing + dev_write = 0x40, + sb_write = user_write | gc_write | dev_write // user or gc is writing }; // zone in zns @@ -205,19 +206,6 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_append_size = info->zasl; // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - // set all zone to free_zones - info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); - info->free_zones_tail = info->free_zones; - pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); - for (uint32_t i = 1U; i < info->num_zones; ++i) { - info->free_zones_tail->next = (zone_info *)calloc(1UL, - sizeof(zone_info)); - info->free_zones_tail = info->free_zones_tail->next; - info->free_zones_tail->saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); - } // reset device if (params->force_reset) { free(zns_report); @@ -229,7 +217,7 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) } // set log zone page mapped hashmap size to num_data_zones info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); + sizeof(logical_block)); info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; @@ -238,29 +226,102 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } + // set all zone to free_zones + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); + for (uint32_t i = 1U; i < info->num_zones; ++i) { + info->free_zones_tail->next = (zone_info *) + calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones_tail->next; + info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, + NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); + } // set num_free_zones info->num_free_zones = info->num_zones; - //Set current log zone to 0th zone - info->curr_log_zone = info->free_zones; - info->free_zones = info->free_zones->next; - if (!info->free_zones) - info->free_zones_tail = NULL; - info->curr_log_zone->next = NULL; - --info->num_free_zones; - // init zones_lock - pthread_mutex_init(&info->zones_lock, NULL); } else { uint32_t read_size = le64_to_cpu(zns_report->entries[0].wp) * info->page_size; free(zns_report); uint8_t *blocks_info = (uint8_t *)calloc(read_size, sizeof(uint8_t)); - read_from_zns(info, 0ULL, blocks_info, read_size, dev_io); + read_from_zns(info, 0ULL, blocks_info, read_size, dev_read); nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; uint8_t *ptr = blocks_info; std::unordered_set used_zones_index; + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(info->bitmap_size, + sizeof(uint8_t)); + memcpy(info->logical_blocks[i].bitmap, ptr, info->bitmap_size); + ptr += info->bitmap_size; + if (*ptr) { + ptr += sizeof(uint8_t); + info->logical_blocks[i].data_zone = (zone_info *) + calloc(1UL, + sizeof(zone_info)); + memcpy(&info->logical_blocks[i].data_zone->saddr, ptr, + sizeof(zone_info::saddr)); + ptr += sizeof(zone_info::saddr); + memcpy(&info->logical_blocks[i].data_zone->write_ptr, ptr, + sizeof(zone_info::write_ptr)); + ptr += sizeof(zone_info::write_ptr); + pthread_mutex_init( + &info->logical_blocks[i].data_zone->num_valid_pages_lock, + NULL + ); + pthread_mutex_init( + &info->logical_blocks[i].data_zone->write_ptr_lock, + NULL + ); + used_zones_index.emplace( + info->logical_blocks[i].data_zone->saddr / + info->zone_num_pages + ); + } else { + ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + + sizeof(zone_info::write_ptr); + } + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + } free(blocks_info); + // set rest zone to free_zones + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); + for (uint32_t i = 1U; i < info->num_zones; ++i) { + if (!used_zones_index.count(i)) { + info->free_zones_tail->next = (zone_info *) + calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones_tail->next; + info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, + NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, + NULL); + } + } + // set num_free_zones + info->num_free_zones = info->num_zones - used_zones_index.size(); } + //Set current log zone to 0th zone + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; + info->curr_log_zone->next = NULL; + --info->num_free_zones; + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); // set gc_wmark info->gc_wmark = params->gc_wmark > 0 ? params->gc_wmark : 0U; //Start GC @@ -411,15 +472,26 @@ int deinit_ss_zns_device(user_zns_device *my_dev) { zns_info *info = (zns_info *)my_dev->_private; // Kill gc - pthread_mutex_lock(&info->zones_lock); - if (info->used_log_zones) - info->used_log_zones_tail->next = info->curr_log_zone; - else - info->used_log_zones = info->curr_log_zone; - info->used_log_zones_tail = info->curr_log_zone; - info->curr_log_zone = NULL; - ++info->num_used_log_zones; - pthread_mutex_unlock(&info->zones_lock); + if (info->curr_log_zone->write_ptr) { + pthread_mutex_lock(&info->zones_lock); + if (info->used_log_zones) + info->used_log_zones_tail->next = info->curr_log_zone; + else + info->used_log_zones = info->curr_log_zone; + info->used_log_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; + ++info->num_used_log_zones; + pthread_mutex_unlock(&info->zones_lock); + } else { + pthread_mutex_lock(&info->zones_lock); + if (info->free_zones) + info->free_zones_tail->next = info->curr_log_zone; + else + info->free_zones = info->curr_log_zone; + info->free_zones_tail = info->curr_log_zone; + ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); + } info->run_gc = false; pthread_join(info->gc_thread, NULL); uint64_t block_info_size = info->bitmap_size + sizeof(uint8_t) + @@ -455,7 +527,7 @@ int deinit_ss_zns_device(user_zns_device *my_dev) } free(blocks); append_to_data_zone(info, info->free_zones, - blocks_info, append_size, dev_io); + blocks_info, append_size, dev_write); free(blocks_info); while (info->free_zones) { zone_info *tmp = info->free_zones; @@ -904,9 +976,16 @@ static void *garbage_collection(void *info_ptr) index = (index + 1U) % info->num_data_zones; } // check the first zone is free zone or not - for (zone_info *zone = info->free_zones; zone; zone = zone->next) { - if (!zone->saddr) + if (!info->free_zones->saddr) + return NULL; + for (zone_info *prev = info->free_zones, *zone = prev->next; zone; prev = zone, zone = zone->next) { + if (!zone->saddr) { + zone_info *tmp = zone; + prev->next = zone->next; + tmp->next = info->free_zones; + info->free_zones = tmp; return NULL; + } } // find which logical block has the first zone logical_block *block = NULL; From 8d25b114c1f4d04ea4ac290a7813cb4a720f64e4 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 15:34:49 +0000 Subject: [PATCH 089/101] ftl done! --- src/m23-ftl/zns_device.cpp | 160 ++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 90 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 5b2dd43..84cc75a 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -186,6 +186,9 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) return ret; } info->num_zones = le64_to_cpu(zns_report->nr_zones); + uint32_t blocks_info_size = le64_to_cpu(zns_report->entries[0].wp) * + info->page_size; + free(zns_report); (*my_dev)->tparams.zns_num_zones = info->num_zones; // set num_log_zones info->num_log_zones = params->log_zones > 0 ? params->log_zones : 0U; @@ -206,19 +209,20 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_append_size = info->zasl; // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - // reset device + std::unordered_set used_zones_index; + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; if (params->force_reset) { - free(zns_report); + // reset device ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, NVME_ZNS_ZSA_RESET, 0U, NULL); if (ret) { printf("Zone reset failed %d\n", ret); return ret; } - // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); - info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; + // set logical block for (uint32_t i = 0U; i < info->num_data_zones; ++i) { info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; info->logical_blocks[i].bitmap = (uint8_t *) @@ -226,93 +230,65 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) sizeof(uint8_t)); pthread_mutex_init(&info->logical_blocks[i].lock, NULL); } - // set all zone to free_zones - info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); - info->free_zones_tail = info->free_zones; - pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); - for (uint32_t i = 1U; i < info->num_zones; ++i) { - info->free_zones_tail->next = (zone_info *) - calloc(1UL, sizeof(zone_info)); - info->free_zones_tail = info->free_zones_tail->next; - info->free_zones_tail->saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, - NULL); - pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, NULL); - } - // set num_free_zones - info->num_free_zones = info->num_zones; } else { - uint32_t read_size = le64_to_cpu(zns_report->entries[0].wp) * - info->page_size; - free(zns_report); - uint8_t *blocks_info = (uint8_t *)calloc(read_size, sizeof(uint8_t)); - read_from_zns(info, 0ULL, blocks_info, read_size, dev_read); + uint8_t *blocks_info = (uint8_t *)calloc(blocks_info_size, + sizeof(uint8_t)); + // read blocks information + read_from_zns(info, 0ULL, blocks_info, blocks_info_size, dev_read); + info->used_status &= ~dev_read; + // reset the first zone nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, false, NVME_ZNS_ZSA_RESET, 0U, NULL); - // set log zone page mapped hashmap size to num_data_zones - info->logical_blocks = (logical_block *)calloc(info->num_data_zones, - sizeof(logical_block)); - info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; + // set logical block uint8_t *ptr = blocks_info; - std::unordered_set used_zones_index; for (uint32_t i = 0U; i < info->num_data_zones; ++i) { - info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; - info->logical_blocks[i].bitmap = (uint8_t *) - calloc(info->bitmap_size, - sizeof(uint8_t)); - memcpy(info->logical_blocks[i].bitmap, ptr, info->bitmap_size); + logical_block *block = &info->logical_blocks[i]; + block->s_page_addr = i * info->zone_num_pages; + block->bitmap = (uint8_t *)calloc(info->bitmap_size, + sizeof(uint8_t)); + memcpy(block->bitmap, ptr, info->bitmap_size); ptr += info->bitmap_size; + // if logical block has a data zone if (*ptr) { ptr += sizeof(uint8_t); - info->logical_blocks[i].data_zone = (zone_info *) - calloc(1UL, - sizeof(zone_info)); - memcpy(&info->logical_blocks[i].data_zone->saddr, ptr, - sizeof(zone_info::saddr)); + block->data_zone = (zone_info *)calloc(1UL, sizeof(zone_info)); + memcpy(&block->data_zone->saddr, ptr, sizeof(zone_info::saddr)); ptr += sizeof(zone_info::saddr); - memcpy(&info->logical_blocks[i].data_zone->write_ptr, ptr, + memcpy(&block->data_zone->write_ptr, ptr, sizeof(zone_info::write_ptr)); ptr += sizeof(zone_info::write_ptr); - pthread_mutex_init( - &info->logical_blocks[i].data_zone->num_valid_pages_lock, - NULL - ); - pthread_mutex_init( - &info->logical_blocks[i].data_zone->write_ptr_lock, - NULL - ); - used_zones_index.emplace( - info->logical_blocks[i].data_zone->saddr / - info->zone_num_pages - ); + pthread_mutex_init(&block->data_zone->num_valid_pages_lock, + NULL); + pthread_mutex_init(&block->data_zone->write_ptr_lock, NULL); + used_zones_index.emplace(block->data_zone->saddr / + info->zone_num_pages); } else { ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + sizeof(zone_info::write_ptr); } - pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + pthread_mutex_init(&block->lock, NULL); } free(blocks_info); - // set rest zone to free_zones - info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); - info->free_zones_tail = info->free_zones; - pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); - pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); - for (uint32_t i = 1U; i < info->num_zones; ++i) { - if (!used_zones_index.count(i)) { - info->free_zones_tail->next = (zone_info *) - calloc(1UL, sizeof(zone_info)); - info->free_zones_tail = info->free_zones_tail->next; - info->free_zones_tail->saddr = i * info->zone_num_pages; - pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, - NULL); - pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, - NULL); - } + } + // set rest zone to free_zones + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); + for (uint32_t i = 1U; i < info->num_zones; ++i) { + if (!used_zones_index.count(i)) { + info->free_zones_tail->next = (zone_info *) + calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones_tail->next; + info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, + NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, + NULL); } - // set num_free_zones - info->num_free_zones = info->num_zones - used_zones_index.size(); } + // set num_free_zones + info->num_free_zones = info->num_zones - used_zones_index.size(); //Set current log zone to 0th zone info->curr_log_zone = info->free_zones; info->free_zones = info->free_zones->next; @@ -529,6 +505,7 @@ int deinit_ss_zns_device(user_zns_device *my_dev) append_to_data_zone(info, info->free_zones, blocks_info, append_size, dev_write); free(blocks_info); + info->used_status &= ~dev_write; while (info->free_zones) { zone_info *tmp = info->free_zones; info->free_zones = info->free_zones->next; @@ -938,7 +915,6 @@ static void *garbage_collection(void *info_ptr) // Remove zone from used_log_zones // if valid_page is zero and add that zone to free zones list zone_info *prev = NULL; - zone_info *free = NULL; zone_info *curr = info->used_log_zones; while (curr) { if (!curr->num_valid_pages) { @@ -947,26 +923,25 @@ static void *garbage_collection(void *info_ptr) nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); pthread_mutex_lock(&info->zones_lock); - // Remove from used_log_zones - free = curr; + // Remove from used_log_zones and add to free_zones + if (info->free_zones) + info->free_zones_tail->next = curr; + else + info->free_zones = curr; + info->free_zones_tail = curr; + ++info->num_free_zones; curr = curr->next; + info->free_zones_tail->next = NULL; if (prev) { prev->next = curr; - if (free == info->used_log_zones_tail) + if (info->free_zones_tail == info->used_log_zones_tail) info->used_log_zones_tail = prev; } else { info->used_log_zones = curr; if (!info->used_log_zones) info->used_log_zones_tail = NULL; } - free->next = NULL; --info->num_used_log_zones; - if (info->free_zones) - info->free_zones_tail->next = free; - else - info->free_zones = free; - info->free_zones_tail = free; - ++info->num_free_zones; pthread_mutex_unlock(&info->zones_lock); } else { prev = curr; @@ -978,12 +953,12 @@ static void *garbage_collection(void *info_ptr) // check the first zone is free zone or not if (!info->free_zones->saddr) return NULL; - for (zone_info *prev = info->free_zones, *zone = prev->next; zone; prev = zone, zone = zone->next) { + for (zone_info *prev = info->free_zones, *zone = prev->next; + zone; prev = zone, zone = zone->next) { if (!zone->saddr) { - zone_info *tmp = zone; prev->next = zone->next; - tmp->next = info->free_zones; - info->free_zones = tmp; + zone->next = info->free_zones; + info->free_zones = zone; return NULL; } } @@ -998,20 +973,25 @@ static void *garbage_collection(void *info_ptr) } // clean the first zone uint64_t size = block->data_zone->write_ptr * info->page_size; - uint8_t buffer[size]; + uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t)); + // read data from the first zone read_from_zns(info, block->data_zone->saddr, buffer, size, gc_read); info->used_status &= ~gc_read; zone_info *old_data_zone = block->data_zone; + // reset the first zone old_data_zone->write_ptr = 0U; nvme_zns_mgmt_send(info->fd, info->nsid, old_data_zone->saddr, false, NVME_ZNS_ZSA_RESET, 0U, NULL); + // swap info->free_zones and block->data_zone block->data_zone = info->free_zones; old_data_zone->next = info->free_zones->next; if (info->num_free_zones == 1U) info->free_zones_tail = old_data_zone; info->free_zones = old_data_zone; block->data_zone->next = NULL; + // append data to new data zone append_to_data_zone(info, block->data_zone, buffer, size, gc_write); + free(buffer); info->used_status &= ~gc_write; return NULL; } From 8ee1aac5191749739a2e8be6f56d99e3a995a41e Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Sun, 16 Oct 2022 21:35:06 +0000 Subject: [PATCH 090/101] [WIP] persistent FS layer --- src/m45-rocksdb/S2FileSystem.cc | 1043 +++++++++++++++++++++++++++++-- src/m45-rocksdb/S2FileSystem.h | 221 ++++++- src/m45-rocksdb/rocks_s2fs.cc | 2 +- 3 files changed, 1207 insertions(+), 59 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 623aa5d..3afefe3 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -28,8 +28,441 @@ SOFTWARE. #include #include -namespace ROCKSDB_NAMESPACE { - S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) { +namespace ROCKSDB_NAMESPACE +{ + int LookupMap_HashFunction(std::string id) + { + unsigned hashindex; + char *ptr = const_cast(id.c_str()); + for (hashindex = 0; *ptr != '\0'; ptr++) + hashindex = *ptr + STRINGENCODE * hashindex; + return hashindex % LOOKUP_MAP_SIZE; + } + + int LookupMap_Insert(MYFS *FSObj, std::string id, Inode *ptr) + { + int index = LookupMap_HashFunction(id); + + mapEntries *map = (mapEntries *)calloc(1, sizeof(mapEntries)); + strcpy(map->id,id.c_str()); + map->ptr = ptr; + map->chain = NULL; + + if (FSObj->LookupCache[index] == NULL) + FSObj->LookupCache[index] = map; + else + { + struct mapEntries *head; + head = FSObj->LookupCache[index]; + while (head->chain != NULL) + head = head->chain; + head->chain = map; + } + + return 0; + } + + int LookupMap_Delete(MYFS *FSObj, std::string id) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head, *tmp = NULL; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (!strcmp(head->id,id.c_str())) + { + if (tmp == NULL) + FSObj->LookupCache[index] = head->chain; + else + tmp->chain = head->chain; + free(head); + break; + } + tmp = head; + head = head->chain; + } + + return 0; + } + + int LookupMap_Lookup(MYFS *FSObj, std::string id, Inode **ptr) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (!strcmp(head->id,id.c_str())) + break; + head = head->chain; + } + + if (head == NULL) + return -1; + + *ptr = head->ptr; + return 0; + } + + int Load_From_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) + { + // Check the size if quantization of LBA + int err = zns_udevice_read(FSObj->zns, addr, buffer, size); + std::cout<<"Load nvm err : "<zns, addr, buffer, size); + return 0; + } + + uint32_t get_FreeInode(MYFS *FSObj) + { + uint32_t ptr = (FSObj->InodePtr + 1) % MAX_INODE_COUNT; + while (ptr != FSObj->InodePtr) + { + if (!FSObj->InodeBitMap[ptr]) + { + FSObj->InodePtr = ptr; + return ptr; + } + ptr = (ptr + 1) % MAX_INODE_COUNT; + } + return 0; + } + + uint64_t get_FreeDataBlock(MYFS *FSObj) + { + uint64_t ptr = (FSObj->DataBlockPtr + 1) % FSObj->DataBlockCount; + while (ptr != FSObj->DataBlockPtr) + { + if (!FSObj->DataBitMap[ptr]) + { + FSObj->DataBlockPtr = ptr; + return (ptr + DATA_BLOCKS_OFFSET) * FSObj->LogicalBlockSize; + } + ptr = (ptr + 1) % FSObj->DataBlockCount; + } + return 0; + } + + void free_DataBlock(MYFS *FSObj, uint64_t addr) + { + int index = (addr / FSObj->LogicalBlockSize) - DATA_BLOCKS_OFFSET; + FSObj->DataBitMap[index] = false; + } + + // Trim till /../path in /../path/name + void Get_ParentPath(std::string path, std::string &parent) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { + index = i; + break; + } + } + // Trim if additional slash is present + if (path[index - 1] == '/') + index--; + + parent = path.substr(0, index); + } + + // Trim /../path/name to name + void Get_EntityName(std::string path, std::string &entityName) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { + index = i; + break; + } + } + entityName = path.substr(index + 1, path.size()); + } + + void Clean_Path(std::string path, std::string &newPath) + { + std::string entity; + Get_EntityName(path, entity); + Get_ParentPath(path, newPath); + newPath.append("/"); + newPath.append(entity); + } + + // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool + // return value will be 0 if asked child is not present + uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren, std::string targetName = "") + { + + // Check no of children and load it + uint64_t children_count = ptr->FileSize; + + MYFS_Dir *dir_ptr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); + for (int i = 0; i < children_count / 16; i++) + { + Load_From_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + for (int j = 0; j < 16; j++) + { + if (loadChildren) + children->push_back(dir_ptr->Entities[j].EntityName); + else + { + if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) + { + if (targetName == "") + { + uint32_t ret = dir_ptr->Entities[j].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[j].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + free(dir_ptr); + return 0; + } + } + } + } + } + + Load_From_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + for (int i = 0; i < children_count % 16; i++) + { + if (loadChildren) + children->push_back(dir_ptr->Entities[i].EntityName); + else + { + if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) + { + if (targetName == "") + { + uint32_t ret = dir_ptr->Entities[i].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[i].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + free(dir_ptr); + return 0; + } + } + } + } + free(dir_ptr); + return 0; + } + + // A recursive call to load inode of the given path to lookupmap + // Stores the inode ptr as well, returns 0 in success + int Get_Path_Inode(MYFS *FSObj, std::string path, Inode **ptr) + { + if (path == "/tmp") + { + *ptr = FSObj->rootEntry; + return 0; + } + + // Check if path in lookupMap cache + int isPresent = LookupMap_Lookup(FSObj, path, ptr); + if (!isPresent) + return 0; + + // if not : Get_Path_Inode for parent dir + std::string parent; + Inode *parentInode; + Get_ParentPath(path, parent); + isPresent = Get_Path_Inode(FSObj, parent, &parentInode); + if (isPresent) + return -1; + // Read parent dir and get asked inode number + if (parentInode->FileSize == 0) + return -1; + // Get Entity to search for + std::string entityName; + Get_EntityName(path, entityName); + uint32_t index = Load_Children(FSObj, parentInode, entityName, NULL, false); + if (!index) + return -1; + + // Load the children index inode from disk and store in lookupMap; + uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; + ptr = (Inode **)calloc(1, sizeof(Inode)); + isPresent = Load_From_NVM(FSObj, address, ptr, (uint64_t)INODE_SIZE); + if (!isPresent) + return -1; + + // Put it in lookup Map + LookupMap_Insert(FSObj, path, *ptr); + return 0; + } + + int Rename_Child_In_Parent(MYFS *FSObj, std::string Ppath, std::string targetName, std::string srcName) + { + // FIXME: Logic for rename + Inode *parentInode; + int isPresent = Get_Path_Inode(FSObj, Ppath, &parentInode); + uint32_t rename = Load_Children(FSObj, parentInode, srcName, NULL, false, targetName); + return rename; + } + + int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) + { + // FIXME: Logic for deletion + Inode *ptr; + int isPresent = Get_Path_Inode(FSObj, Ppath, &ptr); + if (isPresent) + return -1; + + MYFS_DirData dirDataptr; + strcpy(dirDataptr.EntityName, childName.c_str()); + dirDataptr.InodeNum = childInode; + + MYFS_Dir *dirPtr; + dirPtr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); + int index = (++ptr->FileSize) / 16; + uint64_t addr = ptr->Direct_data_lbas[index]; + + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + ptr->Direct_data_lbas[index] = addr; + } + else + { + index = Load_From_NVM(FSObj, addr, dirPtr, 4096); + if (index) + return -1; + } + + index = ptr->FileSize % 16; + dirPtr->Entities[index - 1] = dirDataptr; + Store_To_NVM(FSObj, addr, dirPtr, 4096); + free(dirPtr); + + return 0; + } + /* + void MYFS_DeletePath(MYFS *FSObj, std::string path) + { + Inode *ptr; + int isPresent = Get_Path_Inode(FSObj, path, &ptr); + if (isPresent) + return; + // TODO: Handle logic if dir + // Free data block of inode as well! + + // Update Parent + std::string ppath; + Get_ParentPath(path, ppath); + // Delete from lookup map + } + */ + int MYFS_CreateFile(MYFS *FSObj, std::string path) + { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); + // Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName, entityName.c_str()); + ptr->Inode_no = inode_no; + + // Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + // Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + int MYFS_CreateDir(MYFS *FSObj, std::string path) + { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); + // Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName, entityName.c_str()); + ptr->IsDir = true; + ptr->Inode_no = inode_no; + + // Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + // Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + int initFS(MYFS *FSObj, user_zns_device *zns) + { + FSObj->zns = zns; + FSObj->FileSystemCapacity = zns->capacity_bytes; + FSObj->LogicalBlockSize = zns->lba_size_bytes; + // We reserve a single block as super block and MAX_INODE_COUNT as + FSObj->DataBlockCount = (FSObj->FileSystemCapacity / FSObj->LogicalBlockSize - (MAX_INODE_COUNT + 1)); + + FSObj->rootEntry = (Inode *)calloc(1, sizeof(Inode)); + FSObj->DataBitMap = (bool *)calloc(FSObj->DataBlockCount, sizeof(bool)); + + // this->FileSystemObj->LookupCache = (mapEntries *) calloc(LOOKUP_MAP_SIZE, sizeof(mapEntries)); + void *ptr = (void *) calloc(1, SUPER_BLOCK_SIZE); + Load_From_NVM(FSObj, 0, ptr, SUPER_BLOCK_SIZE); + struct SuperBlock *sb = (SuperBlock *) ptr; + //memcpy(sb, ptr, sizeof(SuperBlock)); + + if(!sb->persistent) { + //Not stored in disk + FSObj->DataBlockPtr = 0; // Reserved for Root Node + FSObj->InodePtr = 0; + FSObj->InodeBitMap[0] = true; + *(FSObj->DataBitMap) = true; + + //Do the following only if already not present + strcpy(FSObj->rootEntry->EntityName, "tmp"); + FSObj->rootEntry->IsDir = true; + FSObj->rootEntry->Inode_no = 0; + FSObj->rootEntry->FileSize = 0; + FSObj->rootEntry->Direct_data_lbas[0] = DATA_BLOCKS_OFFSET * FSObj->LogicalBlockSize; + } else { + //Load root inode; Stored in disk + Load_From_NVM(FSObj, SUPER_BLOCK_SIZE, FSObj->rootEntry, INODE_SIZE); + FSObj->DataBlockPtr = sb->dataBlockPtr; + FSObj->InodePtr = sb->inodeBlockPtr; + memcpy(FSObj->InodeBitMap, ptr+sizeof(SuperBlock), sizeof(FSObj->InodeBitMap)); + memcpy(FSObj->DataBitMap, ptr+sizeof(SuperBlock)+sizeof(FSObj->InodeBitMap), FSObj->DataBlockCount); + } + free(ptr); + //free(sb); + return 0; + } + + S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) + { FileSystem::Default(); std::string sdelimiter = ":"; std::string edelimiter = "://"; @@ -42,8 +475,9 @@ namespace ROCKSDB_NAMESPACE { params.name = strdup(device.c_str()); params.log_zones = 3; params.gc_wmark = 1; - params.force_reset = true; + params.force_reset = false; int ret = init_ss_zns_device(¶ms, &this->_zns_dev); + free(params.name); if(ret != 0){ std::cout << "Error: " << uri_db_path << " failed to open the device " << device.c_str() << "\n"; std::cout << "Error: ret " << ret << "\n"; @@ -53,9 +487,46 @@ namespace ROCKSDB_NAMESPACE { assert(this->_zns_dev->capacity_bytes != 0); ss_dprintf(DBG_FS_1, "device %s is opened and initialized, reported LBA size is %u and capacity %lu \n", device.c_str(), this->_zns_dev->lba_size_bytes, this->_zns_dev->capacity_bytes); + + // INIT File System + this->FileSystemObj = (MYFS *)calloc(1, sizeof(MYFS)); + initFS(this->FileSystemObj, this->_zns_dev); } - S2FileSystem::~S2FileSystem() { + S2FileSystem::~S2FileSystem() + { + Store_To_NVM(this->FileSystemObj, SUPER_BLOCK_SIZE, this->FileSystemObj->rootEntry, INODE_SIZE); + free(this->FileSystemObj->rootEntry); + + //Store all inodes from lookup cache to disk + for(int i=0;iFileSystemObj->LookupCache[i], *tmp; + while(head!=NULL) { + tmp = head; + head = head->chain; + Store_To_NVM(this->FileSystemObj, (tmp->ptr->Inode_no * INODE_SIZE) + SUPER_BLOCK_SIZE, tmp->ptr, INODE_SIZE); + free(tmp->ptr); + free(tmp); + } + } + + void *superBlockWBitMap = (void *) calloc(1,SUPER_BLOCK_SIZE); + struct SuperBlock *sb = (SuperBlock *) calloc(1, sizeof(SuperBlock)); + sb->dataBlockPtr = this->FileSystemObj->DataBlockPtr; + sb->inodeBlockPtr = this->FileSystemObj->InodePtr; + sb->persistent = true; + std::cout<<"Inode count : "<FileSystemObj->DataBlockCount<FileSystemObj->InodeBitMap, MAX_INODE_COUNT); + memcpy(superBlockWBitMap+sizeof(SuperBlock)+MAX_INODE_COUNT, this->FileSystemObj->DataBitMap, this->FileSystemObj->DataBlockCount); + Store_To_NVM(this->FileSystemObj, 0, superBlockWBitMap, SUPER_BLOCK_SIZE); + free(superBlockWBitMap); + free(sb); + free(this->FileSystemObj->DataBitMap); + + deinit_ss_zns_device(this->FileSystemObj->zns); + free(this->FileSystemObj); + } // Create a brand new sequentially-readable file with the specified name. @@ -65,11 +536,22 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_SequentialFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) { + IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } @@ -81,11 +563,22 @@ namespace ROCKSDB_NAMESPACE { // // The returned file may be concurrently accessed by multiple threads. IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_RandomAccessFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - const char *S2FileSystem::Name() const { + const char *S2FileSystem::Name() const + { return "S2FileSytem"; } @@ -97,21 +590,45 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + MYFS_CreateFile(this->FileSystemObj, cpath); + else + ptr->FileSize = 0; + + result->reset(); + result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::ReopenWritableFile(const std::string &fname, const FileOptions &, std::unique_ptr *result, + IODebugContext *) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(); + + result->reset(); + result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } IOStatus S2FileSystem::NewRandomRWFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { + IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) { + IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) + { return IOStatus::IOError(__FUNCTION__); } @@ -124,66 +641,112 @@ namespace ROCKSDB_NAMESPACE { // returns non-OK. IOStatus S2FileSystem::NewDirectory(const std::string &name, const IOOptions &io_opts, std::unique_ptr *result, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + + result->reset(); + result->reset(new MYFS_Directory(this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { + IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) { + IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } // Create the specified directory. Returns error if directory exists. - IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(dirname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, cpath); + else + return IOStatus::IOError(__FUNCTION__); + + return IOStatus::OK(); } // Creates directory if missing. Return Ok if it exists, or successful in // Creating. - IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(dirname, cpath); + Inode *ptr; + std::string dir = cpath.substr(0, cpath.size() - 1); + int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dir); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus - S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) + { + + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + else + *file_size = ptr->FileSize; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetFileModificationTime(const std::string &fname, const IOOptions &options, uint64_t *file_mtime, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + //*output_path = db_path; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { + // MYFS_DeletePath(this->FileSystemObj, fname); + std::cout<<"Delete file called"< *result, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) { + IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. - IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) + { + return IOStatus::OK(); } // Lock the specified file. Used to prevent concurrent access to @@ -200,30 +763,69 @@ namespace ROCKSDB_NAMESPACE { // to go away. // // May create the named file if it does not already exist. - IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) + { + return IOStatus::OK(); } IOStatus - S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) { + S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { + IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) { + IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::RenameFile(const std::string &src, const std::string &target, const IOOptions &options, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + std::string cpath_target, cpath_src; + Clean_Path(src, cpath_src); + Clean_Path(target, cpath_target); + Inode *targetptr, *sourceptr; + // MYFS_DeletePath(this->FileSystemObj, target); + // FIXME: Logic for rename + // Change name in Inode + // Change in parent + + // verify if target exists + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); + if (isPresent) + { + // if it is not present + // rename the inode + std::string entityName; + Get_EntityName(cpath_src, entityName); + Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); + LookupMap_Delete(this->FileSystemObj, cpath_src); + + LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); + std::string targetEntityName; + Get_EntityName(cpath_target, targetEntityName); + + std::string parentPath; + Get_ParentPath(cpath_target, parentPath); + int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, targetEntityName); + if (parentUpdated) + return IOStatus::IOError(__FUNCTION__); + } + else + { + } + return IOStatus::OK(); } IOStatus S2FileSystem::GetChildrenFileAttributes(const std::string &dir, const IOOptions &options, - std::vector *result, IODebugContext *dbg) { + std::vector *result, IODebugContext *dbg) + { return FileSystem::GetChildrenFileAttributes(dir, options, result, dbg); } @@ -235,8 +837,19 @@ namespace ROCKSDB_NAMESPACE { // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + std::string cpath; + Get_ParentPath(dir, cpath); + Inode *ptr; + + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + uint32_t err = Load_Children(this->FileSystemObj, ptr, "", result, true); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } // Returns OK if the named file exists. @@ -244,13 +857,341 @@ namespace ROCKSDB_NAMESPACE { // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered - IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { + Inode *ptr; + std::string cpath; + Clean_Path(fname, cpath); + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::NotFound(); + return IOStatus::OK(); } IOStatus S2FileSystem::ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } -} \ No newline at end of file + + int load_nth_indirect_block(MYFS *FSObj, uint32_t n, uint64_t indirect_lba, Indirect_ptr **ptr) + { + for (int i = 0; i < n; i++) + Load_From_NVM(FSObj, (*ptr)->Indirect_ptr_lbas, *ptr, 4096); + } + + int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) + { + uint32_t curr = offset / 4096, end = (offset+size) / 4096; + uint64_t if_dirty_addr; + uint64_t *data_block_lba_ptr, next_indirect_block_addr; + uint32_t no_of_data_block_ptrs; + Indirect_ptr *iptr = NULL; + // Load the direct ptr + if (curr < 480) + { + // In Inode block itself + data_block_lba_ptr = ptr->Direct_data_lbas; + no_of_data_block_ptrs = 480; + next_indirect_block_addr = ptr->Indirect_ptr_lbas; + if_dirty_addr = 4096 + (ptr->Inode_no * INODE_SIZE); + } + else + { + curr -= 480; + int nth_indirect = curr / 510; + //What if ptr->Indirect_ptr_lba + iptr = (Indirect_ptr *)calloc(1, 4096); + if(ptr->Indirect_ptr_lbas == 0) { + ptr->Indirect_ptr_lbas = get_FreeDataBlock(FSObj); + } + + Load_From_NVM(FSObj, ptr->Indirect_ptr_lbas, iptr, 4096); + for (int i = 0; i < nth_indirect; i++) + Load_From_NVM(FSObj, iptr->Indirect_ptr_lbas, iptr, 4096); + + data_block_lba_ptr = iptr->Direct_data_lbas; + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + curr = curr % 510; + if_dirty_addr = iptr->Current_addr; + } + + uint64_t addr; + for (int i = 0; i <= end; i++) + { + addr = *(data_block_lba_ptr + curr); + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + *(data_block_lba_ptr+curr) = addr; + } + addresses->push_back(addr); + curr++; + + if (curr == no_of_data_block_ptrs) + { + if (!next_indirect_block_addr) + { + // If no indirect block ptr, create one and store to mem + next_indirect_block_addr = get_FreeDataBlock(FSObj); + if (iptr == NULL) + { + ptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + + } + else + { + iptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + free(iptr); + } + iptr = (Indirect_ptr *)calloc(1, 4096); + iptr->Current_addr = next_indirect_block_addr; + } + else + { + if (iptr == NULL) + iptr = (Indirect_ptr *)calloc(1, 4096); + + Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); + } + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + data_block_lba_ptr = iptr->Direct_data_lbas; + curr = 0; + } + } + + // Store dirty block to NVM + if (iptr == NULL) + { + // addresses->push_back(); + Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + } + else + { + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + } + + free(iptr); + return 0; + } + + // MYFS_File definition + MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) + { + this->FSObj = FSObj; + Get_Path_Inode(FSObj, filePath, &(this->ptr)); + this->curr_read_offset = 0; + } + + int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) + { + if (ptr->FileSize < offset + size) { + if(offset >= ptr->FileSize) + return 0; + size = ptr->FileSize - offset; + } + + std::vector addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) + return -1; + + char *readD = (char *)calloc(addresses_to_read.size(), 4096); + for (int i = 0; i < addresses_to_read.size(); i++) + Load_From_NVM(this->FSObj, addresses_to_read.at(i), readD + (i * 4096), 4096); + + int smargin = offset % 4096; + memcpy(data, readD + smargin, size); + free(readD); + return size; + } + + int MYFS_File::Read(uint64_t size, char *data) + { + // Check with file size + int sizeW = this->PRead(this->curr_read_offset, size, data); + this->curr_read_offset += sizeW; + return sizeW; + } + + int MYFS_File::Seek(uint64_t offset) + { + if (ptr->FileSize < this->curr_read_offset + offset) + return -1; + this->curr_read_offset += offset; + return 0; + } + + int MYFS_File::Truncate(uint64_t size) + { + // TODO: Free Data Block + this->ptr->FileSize = size; + return 0; + } + + int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) + { + std::vector addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) + return -1; + + // Do read-modify-update cycle if smargin is present on 1st address. + int smargin = offset % 4096; + char *buffer = (char *)calloc(addresses_to_read.size(), 4096); + if (smargin) + Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); + + memcpy(buffer + smargin, data, size); + for (int i = 0; i < addresses_to_read.size(); i++) + Store_To_NVM(this->FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); + + // Update file size + this->ptr->FileSize = offset + size; + free(buffer); + return 0; + } + + int MYFS_File::Append(uint64_t size, char *data) + { + return this->PAppend(this->ptr->FileSize, size, data); + } + + uint64_t MYFS_File::GetFileSize() + { + return this->ptr->FileSize; + } + + int MYFS_File::Close() + { + // Flush Inode changes to Disk + } + + // Def of MYFS_SequentialFile + MYFS_SequentialFile::MYFS_SequentialFile(std::string fpath, MYFS *FSObj) + { + this->fp = new MYFS_File(fpath, FSObj); + } + + IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) + { + + int sizeW = this->fp->Read(n, scratch); + *result = Slice(scratch, sizeW); + return IOStatus::OK(); + } + + // IOStatus MYFS_SequentialFile::PositionedRead(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, + // char *scratch, IODebugContext *dbg) + // { + // int err = this->fp->PRead(offset, n, scratch); + // if (err) + // return IOStatus::IOError(__FUNCTION__); + // *result = Slice(scratch, n); + // return IOStatus::OK(); + // } + + IOStatus MYFS_SequentialFile::Skip(uint64_t n) + { + int err = this->fp->Seek(n); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + // Def MYFS_RandomAccessFile + MYFS_RandomAccessFile::MYFS_RandomAccessFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + } + + IOStatus MYFS_RandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, + IODebugContext *dbg) const + { + int sizeW = this->fp->PRead(offset, n, scratch); + *result = Slice(scratch, sizeW); + return IOStatus::OK(); + } + + // Def MYFS_WritableFile + MYFS_WritableFile::MYFS_WritableFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + this->cache = false; + this->cacheSize = 0; + } + + IOStatus MYFS_WritableFile::Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) + { + int err = this->fp->Truncate(size); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + IOStatus MYFS_WritableFile::ClearCache() { + if(!this->cache) + return IOStatus::OK(); + this->cache = false; + int err = this->fp->Append(this->cacheSize, this->cacheData); + if (err) + return IOStatus::IOError(__FUNCTION__); + free(this->cacheData); + this->cacheSize = 0; + return IOStatus::OK(); + } + + IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) + { + + char *block = (char *)data.data(); + uint64_t size = data.size(); + if(this->cache) { + //Append to cache + char *tmp = (char *)calloc(1, this->cacheSize+size); + memcpy(tmp, this->cacheData, this->cacheSize); + memcpy(tmp+this->cacheSize, block, size); + free(this->cacheData); + this->cacheData = tmp; + this->cacheSize += size; + //If size > 4096 clear cache + if(this->cacheSize >= 4096*200) + this->ClearCache(); + return IOStatus::OK(); + } else if(size < 4096*200) { + //Append to cache + this->cache = true; + this->cacheData = (char *)calloc(1, size); + memcpy(this->cacheData, block, size); + this->cacheSize = size; + return IOStatus::OK(); + } + int err = this->fp->Append(size, block); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + // MYFS_Directory::MYFS_Directory(std::string name) { + // std::cout<<"For checl"<fp->PAppend(offset, size, block); + std::cout<<"PAppend size : "< #include -namespace ROCKSDB_NAMESPACE { +#define LOOKUP_MAP_SIZE 1000 +#define MAX_INODE_COUNT 255 +#define INODE_SIZE 4096 +#define SUPER_BLOCK_SIZE 4096*2 +#define STRINGENCODE 31 +#define DATA_BLOCKS_OFFSET 256 +namespace ROCKSDB_NAMESPACE +{ + struct SuperBlock + { + bool persistent; + uint64_t inodeBlockPtr; + uint64_t dataBlockPtr; + }; + + struct Inode + { + uint32_t Inode_no; + char EntityName[235]; + bool IsDir; + uint64_t FileSize; + uint64_t Indirect_ptr_lbas; + uint64_t Direct_data_lbas[480]; + }; + + struct mapEntries + { + char id[1000]; + Inode *ptr; + mapEntries *chain; + }; + + struct Indirect_ptr + { + uint64_t Current_addr; + uint64_t Direct_data_lbas[510]; + uint64_t Indirect_ptr_lbas; + }; + + struct MYFS_DirData + { + char EntityName[252]; + uint32_t InodeNum; + }; + + struct MYFS_Dir + { + MYFS_DirData Entities[16]; + }; + + struct MYFS + { + mapEntries *LookupCache[LOOKUP_MAP_SIZE]; // Map type to void ptrs; + bool InodeBitMap[MAX_INODE_COUNT]; + bool *DataBitMap; + uint32_t InodePtr; + + uint64_t DataBlockPtr; + uint64_t DataBlockMax; + + uint64_t DataBlockCount; + uint64_t FileSystemCapacity; + uint32_t LogicalBlockSize; + Inode *rootEntry; + user_zns_device *zns; + }; + + /* + int Load_From_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); + int Store_To_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); + void Get_ParentPath(std::string path, std::string &parent); + void Get_EntityName(std::string path, std::string &entityName); + //void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); + // int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); + int LookupMap_HashFunction(void *data); + */ + + + class MYFS_File + { + private: + struct Inode *ptr; + MYFS *FSObj; + uint64_t curr_read_offset; + void *current_ptr; - class S2FileSystem : public FileSystem { + public: + MYFS_File(std::string filePath, MYFS *FSObj); + virtual ~MYFS_File() = default; + int Read(uint64_t size, char *data); + int PRead(uint64_t offset, uint64_t size, char *data); + int Seek(uint64_t offset); + int Truncate(uint64_t size); + int Append(uint64_t size, char *data); + int PAppend(uint64_t offset, uint64_t size, char *data); + uint64_t GetFileSize(); + int Close(); + }; + + /* + *Creates read only MYFS_File object + */ + class MYFS_SequentialFile : public FSSequentialFile + { + private: + MYFS_File *fp; + + public: + MYFS_SequentialFile(std::string filePath, MYFS *FSObj); + virtual ~MYFS_SequentialFile(){delete this->fp;} + virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, + char *scratch, IODebugContext *dbg)override; + + virtual IOStatus Skip(uint64_t n) override; + // virtual IOStatus PositionedRead(uint64_t offset, size_t n, + // const IOOptions &opts, Slice *result, + // char *scratch, IODebugContext *dbg) override; + // virtual IOStatus InvalidateCache(size_t offset, size_t length) override + // { + // return IOStatus::OK(); + // }; + // virtual bool use_direct_io() const override { return true; } + // virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + }; + + class MYFS_RandomAccessFile : public FSRandomAccessFile + { + private: + MYFS_File *fp; + + public: + MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); + virtual ~MYFS_RandomAccessFile(){delete this->fp;} + virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, + Slice *result, char *scratch, IODebugContext *dbg) const override; + /* + virtual IOStatus MultiRead(FSReadRequest *reqs, size_t num_reqs, + const IOOptions &options, + IODebugContext *dbg) {std::cout<<"MULTIREAD"<ClearCache();delete this->fp;} + virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) {return IOStatus::OK();}; + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + /* + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + const DataVerificationInfo & /* verification_info , + IODebugContext *dbg) override + { + return Append(data, opts, dbg); + } + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, const DataVerificationInfo & /* verification_info, + IODebugContext *dbg) override + { + return PositionedAppend(data, offset, opts, dbg); + } + + virtual IOStatus Fsync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual bool IsSyncThreadSafe() const { return false; } + virtual bool use_direct_io() const override { return true; } + virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {} + virtual uint64_t GetFileSize(const IOOptions &opts, + IODebugContext *dbg) override {std::cout<<"Calling this module"<fp->GetFileSize();} + virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); } + virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + */ + }; + + class MYFS_Directory : public FSDirectory + { + private: + MYFS *fp; + public: + MYFS_Directory(MYFS *FSObj){} + virtual ~MYFS_Directory(){} + virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { + return IOStatus::OK(); + } + }; + + class S2FileSystem : public FileSystem + { public: // No copying allowed S2FileSystem(std::string uri, bool debug); - S2FileSystem(const S2FileSystem&) = delete; + S2FileSystem(const S2FileSystem &) = delete; virtual ~S2FileSystem(); IOStatus IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) override; @@ -91,9 +297,9 @@ namespace ROCKSDB_NAMESPACE { GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg); - IOStatus DeleteFile(const std::string& fname, - const IOOptions& options, - IODebugContext* dbg); + IOStatus DeleteFile(const std::string &fname, + const IOOptions &options, + IODebugContext *dbg); IOStatus NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, @@ -131,7 +337,8 @@ namespace ROCKSDB_NAMESPACE { struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; + struct MYFS *FileSystemObj; }; } -#endif //STOSYS_PROJECT_S2FILESYSTEM_H \ No newline at end of file +#endif //STOSYS_PROJECT_S2FILESYSTEM_H diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index c57e028..b4bf276 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(false){ + if(true){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { From f08a0cfb65e075e19a1e2e0dd50dd78df043c209 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 21:53:53 +0000 Subject: [PATCH 091/101] change to always rest false --- src/m23-ftl/zns_device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 84cc75a..559dd39 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -214,7 +214,7 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->logical_blocks = (logical_block *)calloc(info->num_data_zones, sizeof(logical_block)); info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; - if (params->force_reset) { + if (params->force_reset || !blocks_info_size) { // reset device ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, NVME_ZNS_ZSA_RESET, 0U, NULL); @@ -496,7 +496,7 @@ int deinit_ss_zns_device(user_zns_device *my_dev) ptr += sizeof(zone_info::write_ptr); free(blocks[i].data_zone); } else { - ptr += sizeof(bool) + sizeof(zone_info::saddr) + + ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + sizeof(zone_info::write_ptr); } pthread_mutex_destroy(&blocks[i].lock); From 20ac1a1792221da147a5cf4911763ee893887d8c Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 22:01:29 +0000 Subject: [PATCH 092/101] change to always rest false --- src/m23-ftl/zns_device.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 559dd39..07e343a 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -216,11 +216,13 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; if (params->force_reset || !blocks_info_size) { // reset device - ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, - NVME_ZNS_ZSA_RESET, 0U, NULL); - if (ret) { - printf("Zone reset failed %d\n", ret); - return ret; + if (params->force_reset) { + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return ret; + } } // set logical block for (uint32_t i = 0U; i < info->num_data_zones; ++i) { From aa07ffa64f370f53f7a8de82e9f7b8e9f29fb8dd Mon Sep 17 00:00:00 2001 From: yssamtu Date: Sun, 16 Oct 2022 22:48:40 +0000 Subject: [PATCH 093/101] ftl is ready for persistency but keeping reset option --- src/m23-ftl/zns_device.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 07e343a..e97728e 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -25,7 +25,6 @@ SOFTWARE. #include #include #include -#include #include #include #include "zns_device.h" @@ -209,7 +208,11 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_append_size = info->zasl; // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - std::unordered_set used_zones_index; + // std::unordered_set used_zones_index; + uint8_t *used_zones_index = (uint8_t *) + calloc(((info->num_zones - 1U) >> 3U) + 1U, + sizeof(uint8_t)); + uint32_t num_used_zones = 0U; // set log zone page mapped hashmap size to num_data_zones info->logical_blocks = (logical_block *)calloc(info->num_data_zones, sizeof(logical_block)); @@ -262,8 +265,10 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) pthread_mutex_init(&block->data_zone->num_valid_pages_lock, NULL); pthread_mutex_init(&block->data_zone->write_ptr_lock, NULL); - used_zones_index.emplace(block->data_zone->saddr / - info->zone_num_pages); + write_bitmap(used_zones_index, + block->data_zone->saddr / info->zone_num_pages, + 1U); + ++num_used_zones; } else { ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + sizeof(zone_info::write_ptr); @@ -278,7 +283,7 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); for (uint32_t i = 1U; i < info->num_zones; ++i) { - if (!used_zones_index.count(i)) { + if (!read_bitmap(used_zones_index, i, 1U)) { info->free_zones_tail->next = (zone_info *) calloc(1UL, sizeof(zone_info)); info->free_zones_tail = info->free_zones_tail->next; @@ -289,8 +294,9 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) NULL); } } + free(used_zones_index); // set num_free_zones - info->num_free_zones = info->num_zones - used_zones_index.size(); + info->num_free_zones = info->num_zones - num_used_zones; //Set current log zone to 0th zone info->curr_log_zone = info->free_zones; info->free_zones = info->free_zones->next; From 923c073b48798e7ae6d93b78609cde14876b53f5 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 17 Oct 2022 09:01:23 +0000 Subject: [PATCH 094/101] Init commit with m1 --- src/m45-rocksdb/m45_main.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/m45-rocksdb/m45_main.cc b/src/m45-rocksdb/m45_main.cc index dd88cec..266860c 100644 --- a/src/m45-rocksdb/m45_main.cc +++ b/src/m45-rocksdb/m45_main.cc @@ -48,7 +48,7 @@ static std::string genrate_random_string(const int len) { static int fill_up_map(std::map &testmap, int entries, int ksize, int vsize){ int count = 0; - while(testmap.size() != static_cast(entries)) { + while(testmap.size() != entries) { // the problem is that with small key sizes, we might run out of unique keys to insert, hence // we append the count at the end to make them unique and then dynamically adjust the value size to // control the total bytes of data inserted in the database @@ -83,10 +83,10 @@ static void destroy_myrocks_context(struct MyRocksContext *&ctx){ delete[] ctx; } -// static void print_myrocks_context(struct MyRocksContext *ctx){ -// assert(ctx != nullptr); -// std::cout<<" uri: " << ctx->uri << " fs_attached: " << ctx->db->GetFileSystem()->Name() << " \n"; -// } +static void print_myrocks_context(struct MyRocksContext *ctx){ + assert(ctx != nullptr); + std::cout<<" uri: " << ctx->uri << " fs_attached: " << ctx->db->GetFileSystem()->Name() << " \n"; +} // posix takes: posix://.*" // s2fs-rocksdb takes takes: s2fs:.*://.* From b55941b00601073e84752199cde54ff43245464b Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 17 Oct 2022 11:06:23 +0000 Subject: [PATCH 095/101] small change --- src/m23-ftl/zns_device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index e97728e..084ecad 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -473,6 +473,7 @@ int deinit_ss_zns_device(user_zns_device *my_dev) else info->free_zones = info->curr_log_zone; info->free_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; ++info->num_free_zones; pthread_mutex_unlock(&info->zones_lock); } From 079038c517c81125b16037a9c6403c5634073706 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 17 Oct 2022 11:52:33 +0000 Subject: [PATCH 096/101] small change 2 --- src/m23-ftl/zns_device.cpp | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 084ecad..4d3e0ae 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -208,7 +208,6 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) info->free_append_size = info->zasl; // initialise size_limit_lock pthread_mutex_init(&info->size_limit_lock, NULL); - // std::unordered_set used_zones_index; uint8_t *used_zones_index = (uint8_t *) calloc(((info->num_zones - 1U) >> 3U) + 1U, sizeof(uint8_t)); @@ -257,11 +256,11 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) if (*ptr) { ptr += sizeof(uint8_t); block->data_zone = (zone_info *)calloc(1UL, sizeof(zone_info)); - memcpy(&block->data_zone->saddr, ptr, sizeof(zone_info::saddr)); - ptr += sizeof(zone_info::saddr); - memcpy(&block->data_zone->write_ptr, ptr, - sizeof(zone_info::write_ptr)); - ptr += sizeof(zone_info::write_ptr); + memcpy(&block->data_zone->saddr, ptr, + sizeof(unsigned long long)); + ptr += sizeof(unsigned long long); + memcpy(&block->data_zone->write_ptr, ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); pthread_mutex_init(&block->data_zone->num_valid_pages_lock, NULL); pthread_mutex_init(&block->data_zone->write_ptr_lock, NULL); @@ -270,8 +269,8 @@ int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) 1U); ++num_used_zones; } else { - ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + - sizeof(zone_info::write_ptr); + ptr += sizeof(uint8_t) + sizeof(unsigned long long) + + sizeof(uint32_t); } pthread_mutex_init(&block->lock, NULL); } @@ -480,8 +479,7 @@ int deinit_ss_zns_device(user_zns_device *my_dev) info->run_gc = false; pthread_join(info->gc_thread, NULL); uint64_t block_info_size = info->bitmap_size + sizeof(uint8_t) + - sizeof(zone_info::saddr) + - sizeof(zone_info::write_ptr); + sizeof(unsigned long long) + sizeof(uint32_t); uint64_t append_size = ((info->num_data_zones * block_info_size - 1UL) / info->page_size + 1UL) * info->page_size; uint8_t *blocks_info = (uint8_t *)calloc(1UL, append_size); @@ -498,15 +496,15 @@ int deinit_ss_zns_device(user_zns_device *my_dev) pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); memset(ptr, 1, sizeof(uint8_t)); ptr += sizeof(uint8_t); - memcpy(ptr, &blocks[i].data_zone->saddr, sizeof(zone_info::saddr)); - ptr += sizeof(zone_info::saddr); - memcpy(ptr, &blocks[i].data_zone->write_ptr, - sizeof(zone_info::write_ptr)); - ptr += sizeof(zone_info::write_ptr); + memcpy(ptr, &blocks[i].data_zone->saddr, + sizeof(unsigned long long)); + ptr += sizeof(unsigned long long); + memcpy(ptr, &blocks[i].data_zone->write_ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); free(blocks[i].data_zone); } else { - ptr += sizeof(uint8_t) + sizeof(zone_info::saddr) + - sizeof(zone_info::write_ptr); + ptr += sizeof(uint8_t) + sizeof(unsigned long long) + + sizeof(uint32_t); } pthread_mutex_destroy(&blocks[i].lock); } From 1efb0689a59283d97b46d8161f703d842a2e6d79 Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 17 Oct 2022 13:45:35 +0000 Subject: [PATCH 097/101] bug fixed in update_page_map --- src/m23-ftl/zns_device.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 4d3e0ae..2369fb6 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -615,7 +615,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, unsigned long long physical_addr, uint32_t num_pages) { - while (num_pages--) { + for (; num_pages--; ++page_addr, ++physical_addr) { uint32_t index = get_block_index(page_addr, info->zone_num_pages); logical_block *block = &info->logical_blocks[index]; //Lock for updating page map @@ -627,7 +627,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - return; + continue; } if (block->page_maps->page_addr == page_addr) { //Update log counter @@ -635,7 +635,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, block->page_maps->physical_addr = physical_addr; block->page_maps->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - return; + continue; } if (block->page_maps->page_addr > page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); @@ -645,7 +645,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - return; + continue; } page_map *ptr = block->page_maps; while (ptr->next) { @@ -655,7 +655,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - return; + break; } else if (ptr->next->page_addr > page_addr) { page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); tmp->next = ptr->next; @@ -664,18 +664,18 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, tmp->physical_addr = physical_addr; tmp->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - return; + break; } ptr = ptr->next; } + if (ptr->next) + continue; ptr->next = (page_map *)calloc(1, sizeof(page_map)); block->page_maps_tail = ptr->next; ptr->next->page_addr = page_addr; ptr->next->physical_addr = physical_addr; ptr->next->zone = info->curr_log_zone; pthread_mutex_unlock(&block->lock); - ++page_addr; - ++physical_addr; } } From d282e3c38dc5959cbb773ea3cd53f757cce5f83a Mon Sep 17 00:00:00 2001 From: yssamtu Date: Mon, 17 Oct 2022 14:09:55 +0000 Subject: [PATCH 098/101] latest ftl --- src/m23-ftl/zns_device.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 2369fb6..dfdc597 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -647,9 +647,16 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, pthread_mutex_unlock(&block->lock); continue; } - page_map *ptr = block->page_maps; - while (ptr->next) { - if (ptr->next->page_addr == page_addr) { + for (page_map *ptr = block->page_maps; ; ptr = ptr->next) { + if (!ptr->next) { + ptr->next = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps_tail = ptr->next; + ptr->next->page_addr = page_addr; + ptr->next->physical_addr = physical_addr; + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + break; + } else if (ptr->next->page_addr == page_addr) { //Update log counter decrease_num_valid_page(ptr->next->zone, 1U); ptr->next->physical_addr = physical_addr; @@ -666,16 +673,7 @@ static void update_page_map(zns_info *info, unsigned long long page_addr, pthread_mutex_unlock(&block->lock); break; } - ptr = ptr->next; } - if (ptr->next) - continue; - ptr->next = (page_map *)calloc(1, sizeof(page_map)); - block->page_maps_tail = ptr->next; - ptr->next->page_addr = page_addr; - ptr->next->physical_addr = physical_addr; - ptr->next->zone = info->curr_log_zone; - pthread_mutex_unlock(&block->lock); } } From b9d0bb5f0433bb94d1a50698bf03df40e435b2cc Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 17 Oct 2022 19:54:58 +0000 Subject: [PATCH 099/101] FS persistency --- src/m45-rocksdb/S2FileSystem.cc | 143 ++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 64 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 3afefe3..0891f9f 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -110,7 +110,7 @@ namespace ROCKSDB_NAMESPACE { // Check the size if quantization of LBA int err = zns_udevice_read(FSObj->zns, addr, buffer, size); - std::cout<<"Load nvm err : "<InodeBitMap[ptr]) { FSObj->InodePtr = ptr; + FSObj->InodeBitMap[ptr] = true; return ptr; } ptr = (ptr + 1) % MAX_INODE_COUNT; @@ -143,6 +144,7 @@ namespace ROCKSDB_NAMESPACE if (!FSObj->DataBitMap[ptr]) { FSObj->DataBlockPtr = ptr; + FSObj->DataBitMap[ptr] = true; return (ptr + DATA_BLOCKS_OFFSET) * FSObj->LogicalBlockSize; } ptr = (ptr + 1) % FSObj->DataBlockCount; @@ -150,11 +152,13 @@ namespace ROCKSDB_NAMESPACE return 0; } + /* void free_DataBlock(MYFS *FSObj, uint64_t addr) { int index = (addr / FSObj->LogicalBlockSize) - DATA_BLOCKS_OFFSET; FSObj->DataBitMap[index] = false; } + */ // Trim till /../path in /../path/name void Get_ParentPath(std::string path, std::string &parent) @@ -213,8 +217,10 @@ namespace ROCKSDB_NAMESPACE Load_From_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); for (int j = 0; j < 16; j++) { - if (loadChildren) - children->push_back(dir_ptr->Entities[j].EntityName); + if (loadChildren) { + if(strcmp(dir_ptr->Entities[i].EntityName,"")) + children->push_back(dir_ptr->Entities[i].EntityName); + } else { if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) @@ -240,8 +246,10 @@ namespace ROCKSDB_NAMESPACE Load_From_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); for (int i = 0; i < children_count % 16; i++) { - if (loadChildren) - children->push_back(dir_ptr->Entities[i].EntityName); + if (loadChildren) { + if(strcmp(dir_ptr->Entities[i].EntityName,"")) + children->push_back(dir_ptr->Entities[i].EntityName); + } else { if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) @@ -270,6 +278,7 @@ namespace ROCKSDB_NAMESPACE // Stores the inode ptr as well, returns 0 in success int Get_Path_Inode(MYFS *FSObj, std::string path, Inode **ptr) { + if (path == "/tmp") { *ptr = FSObj->rootEntry; @@ -300,17 +309,21 @@ namespace ROCKSDB_NAMESPACE // Load the children index inode from disk and store in lookupMap; uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; - ptr = (Inode **)calloc(1, sizeof(Inode)); - isPresent = Load_From_NVM(FSObj, address, ptr, (uint64_t)INODE_SIZE); - if (!isPresent) - return -1; + Inode *iptr = (Inode *)calloc(1, sizeof(Inode)); + + Load_From_NVM(FSObj, address, iptr, INODE_SIZE); + std::cout<<"Load File : "<EntityName<<" "<Inode_no<"); + + //Change lookupmap + LookupMap_Delete(FSObj, path); + FSObj->InodeBitMap[ptr->Inode_no] = false; + //Free Data zones + free(ptr); + } + + //For creation int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) { // FIXME: Logic for deletion @@ -355,22 +389,9 @@ namespace ROCKSDB_NAMESPACE return 0; } - /* - void MYFS_DeletePath(MYFS *FSObj, std::string path) - { - Inode *ptr; - int isPresent = Get_Path_Inode(FSObj, path, &ptr); - if (isPresent) - return; - // TODO: Handle logic if dir - // Free data block of inode as well! - // Update Parent - std::string ppath; - Get_ParentPath(path, ppath); - // Delete from lookup map - } - */ + + int MYFS_CreateFile(MYFS *FSObj, std::string path) { uint32_t inode_no = get_FreeInode(FSObj); @@ -505,28 +526,27 @@ namespace ROCKSDB_NAMESPACE tmp = head; head = head->chain; Store_To_NVM(this->FileSystemObj, (tmp->ptr->Inode_no * INODE_SIZE) + SUPER_BLOCK_SIZE, tmp->ptr, INODE_SIZE); + std::cout<<"File : "<ptr->EntityName<<" "<ptr->Inode_no<<" @ "<<(tmp->ptr->Inode_no * INODE_SIZE) + SUPER_BLOCK_SIZE<ptr); free(tmp); } } void *superBlockWBitMap = (void *) calloc(1,SUPER_BLOCK_SIZE); - struct SuperBlock *sb = (SuperBlock *) calloc(1, sizeof(SuperBlock)); + struct SuperBlock *sb = (SuperBlock *) superBlockWBitMap;//calloc(1, sizeof(SuperBlock)); sb->dataBlockPtr = this->FileSystemObj->DataBlockPtr; sb->inodeBlockPtr = this->FileSystemObj->InodePtr; sb->persistent = true; std::cout<<"Inode count : "<FileSystemObj->DataBlockCount<FileSystemObj->InodeBitMap, MAX_INODE_COUNT); memcpy(superBlockWBitMap+sizeof(SuperBlock)+MAX_INODE_COUNT, this->FileSystemObj->DataBitMap, this->FileSystemObj->DataBlockCount); Store_To_NVM(this->FileSystemObj, 0, superBlockWBitMap, SUPER_BLOCK_SIZE); free(superBlockWBitMap); - free(sb); + //free(sb); free(this->FileSystemObj->DataBitMap); - deinit_ss_zns_device(this->FileSystemObj->zns); free(this->FileSystemObj); - } // Create a brand new sequentially-readable file with the specified name. @@ -541,8 +561,8 @@ namespace ROCKSDB_NAMESPACE std::string cpath; Clean_Path(fname, cpath); Inode *ptr; - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); - if (isPresent) + int notPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (notPresent) return IOStatus::IOError(__FUNCTION__); result->reset(); @@ -726,7 +746,6 @@ namespace ROCKSDB_NAMESPACE IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { // MYFS_DeletePath(this->FileSystemObj, fname); - std::cout<<"Delete file called"<FileSystemObj, target); - // FIXME: Logic for rename - // Change name in Inode - // Change in parent + // verify if target exists - int isPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); - if (isPresent) - { - // if it is not present - // rename the inode - std::string entityName; - Get_EntityName(cpath_src, entityName); - Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); - LookupMap_Delete(this->FileSystemObj, cpath_src); - - LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); - std::string targetEntityName; - Get_EntityName(cpath_target, targetEntityName); - - std::string parentPath; - Get_ParentPath(cpath_target, parentPath); - int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, targetEntityName); - if (parentUpdated) - return IOStatus::IOError(__FUNCTION__); - } - else - { - } + int notPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); + if (!notPresent) + //If present + MYFS_DeletePath(this->FileSystemObj, cpath_target); + + // if it is not present + // rename the inode + std::string entityName; + Get_EntityName(cpath_src, entityName); + Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); + LookupMap_Delete(this->FileSystemObj, cpath_src); + + LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); + std::string targetEntityName; + Get_EntityName(cpath_target, targetEntityName); + strcpy(sourceptr->EntityName, targetEntityName.c_str()); + + std::string parentPath; + Get_ParentPath(cpath_target, parentPath); + int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, targetEntityName); + if (parentUpdated) + return IOStatus::IOError(__FUNCTION__); return IOStatus::OK(); } @@ -999,7 +1014,7 @@ namespace ROCKSDB_NAMESPACE std::vector addresses_to_read; int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); if (err) - return -1; + return 0; char *readD = (char *)calloc(addresses_to_read.size(), 4096); for (int i = 0; i < addresses_to_read.size(); i++) @@ -1160,10 +1175,10 @@ namespace ROCKSDB_NAMESPACE this->cacheData = tmp; this->cacheSize += size; //If size > 4096 clear cache - if(this->cacheSize >= 4096*200) + if(this->cacheSize >= 4096) this->ClearCache(); return IOStatus::OK(); - } else if(size < 4096*200) { + } else if(size < 4096) { //Append to cache this->cache = true; this->cacheData = (char *)calloc(1, size); From ac4a4789365fee1da98ed287195b089f30072a06 Mon Sep 17 00:00:00 2001 From: Sudarsan Date: Mon, 17 Oct 2022 21:18:44 +0000 Subject: [PATCH 100/101] [WIP] persistency --- src/m45-rocksdb/S2FileSystem.cc | 95 ++++++++++++--------------------- 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 0891f9f..af2ed32 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -313,8 +313,6 @@ namespace ROCKSDB_NAMESPACE Load_From_NVM(FSObj, address, iptr, INODE_SIZE); std::cout<<"Load File : "<EntityName<<" "<Inode_no<"); - - //Change lookupmap - LookupMap_Delete(FSObj, path); - FSObj->InodeBitMap[ptr->Inode_no] = false; - //Free Data zones - free(ptr); - } + //For creation - int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode, bool del = false) + int Update_Parent(MYFS *FSObj, std::string Ppath, std::string childName, uint32_t childInode) { // FIXME: Logic for deletion Inode *ptr; @@ -367,7 +346,7 @@ namespace ROCKSDB_NAMESPACE MYFS_Dir *dirPtr; dirPtr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); - int index = (++ptr->FileSize) / 16; + int index = (ptr->FileSize) / 16; uint64_t addr = ptr->Direct_data_lbas[index]; if (!addr) @@ -382,15 +361,36 @@ namespace ROCKSDB_NAMESPACE return -1; } - index = ptr->FileSize % 16; - dirPtr->Entities[index - 1] = dirDataptr; + index = (ptr->FileSize) % 16; + std::cout<<"FS : "<FileSize<<" "<Entities[index] = dirDataptr; Store_To_NVM(FSObj, addr, dirPtr, 4096); + ptr->FileSize++; free(dirPtr); return 0; } - + void MYFS_DeletePath(MYFS *FSObj, std::string path) + { + Inode *ptr, *parentInode; + int notPresent = Get_Path_Inode(FSObj, path, &ptr); + if (notPresent) + return; + + //Update parent + std::string entityName, ppath; + Get_EntityName(path, entityName); + Get_ParentPath(path, ppath); + Rename_Child_In_Parent(FSObj, ppath, entityName, ""); + //Get_Path_Inode(FSObj, ppath, &parentInode); + //parentInode->FileSize -=1; + //Change lookupmap + LookupMap_Delete(FSObj, path); + FSObj->InodeBitMap[ptr->Inode_no] = false; + //Free Data zones + free(ptr); + } int MYFS_CreateFile(MYFS *FSObj, std::string path) { @@ -823,13 +823,11 @@ namespace ROCKSDB_NAMESPACE std::string entityName; Get_EntityName(cpath_src, entityName); Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); - LookupMap_Delete(this->FileSystemObj, cpath_src); - LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); + LookupMap_Delete(this->FileSystemObj, cpath_src); std::string targetEntityName; Get_EntityName(cpath_target, targetEntityName); strcpy(sourceptr->EntityName, targetEntityName.c_str()); - std::string parentPath; Get_ParentPath(cpath_target, parentPath); int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, targetEntityName); @@ -898,11 +896,11 @@ namespace ROCKSDB_NAMESPACE int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) { - uint32_t curr = offset / 4096, end = (offset+size) / 4096; - uint64_t if_dirty_addr; uint64_t *data_block_lba_ptr, next_indirect_block_addr; - uint32_t no_of_data_block_ptrs; Indirect_ptr *iptr = NULL; + uint32_t curr = offset / 4096, end = (offset+size) / 4096; + uint32_t no_of_data_block_ptrs; + // Load the direct ptr if (curr < 480) { @@ -910,7 +908,6 @@ namespace ROCKSDB_NAMESPACE data_block_lba_ptr = ptr->Direct_data_lbas; no_of_data_block_ptrs = 480; next_indirect_block_addr = ptr->Indirect_ptr_lbas; - if_dirty_addr = 4096 + (ptr->Inode_no * INODE_SIZE); } else { @@ -930,7 +927,6 @@ namespace ROCKSDB_NAMESPACE next_indirect_block_addr = iptr->Indirect_ptr_lbas; no_of_data_block_ptrs = 510; curr = curr % 510; - if_dirty_addr = iptr->Current_addr; } uint64_t addr; @@ -954,7 +950,7 @@ namespace ROCKSDB_NAMESPACE if (iptr == NULL) { ptr->Indirect_ptr_lbas = next_indirect_block_addr; - Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + Store_To_NVM(FSObj, SUPER_BLOCK_SIZE + (ptr->Inode_no * INODE_SIZE), ptr, 4096); } else @@ -982,14 +978,9 @@ namespace ROCKSDB_NAMESPACE // Store dirty block to NVM if (iptr == NULL) - { - // addresses->push_back(); - Store_To_NVM(FSObj, 4096 + (ptr->Inode_no * INODE_SIZE), ptr, 4096); - } + Store_To_NVM(FSObj, SUPER_BLOCK_SIZE + (ptr->Inode_no * INODE_SIZE), ptr, 4096); else - { Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); - } free(iptr); return 0; @@ -1152,11 +1143,11 @@ namespace ROCKSDB_NAMESPACE IOStatus MYFS_WritableFile::ClearCache() { if(!this->cache) return IOStatus::OK(); - this->cache = false; int err = this->fp->Append(this->cacheSize, this->cacheData); if (err) return IOStatus::IOError(__FUNCTION__); free(this->cacheData); + this->cache = false; this->cacheSize = 0; return IOStatus::OK(); } @@ -1191,22 +1182,4 @@ namespace ROCKSDB_NAMESPACE return IOStatus::IOError(__FUNCTION__); return IOStatus::OK(); } - - // MYFS_Directory::MYFS_Directory(std::string name) { - // std::cout<<"For checl"<fp->PAppend(offset, size, block); - std::cout<<"PAppend size : "< Date: Mon, 17 Oct 2022 21:32:10 +0000 Subject: [PATCH 101/101] Bug fix --- src/m45-rocksdb/S2FileSystem.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index af2ed32..a6e64dc 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -996,12 +996,13 @@ namespace ROCKSDB_NAMESPACE int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) { + if (ptr->FileSize < offset + size) { if(offset >= ptr->FileSize) return 0; size = ptr->FileSize - offset; } - + std::cout<<"Read : "<ptr->EntityName<<" "< addresses_to_read; int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); if (err) @@ -1055,7 +1056,7 @@ namespace ROCKSDB_NAMESPACE memcpy(buffer + smargin, data, size); for (int i = 0; i < addresses_to_read.size(); i++) - Store_To_NVM(this->FSObj, addresses_to_read.at(i), data + (i * 4096), 4096); + Store_To_NVM(this->FSObj, addresses_to_read.at(i), buffer + (i * 4096), 4096); // Update file size this->ptr->FileSize = offset + size;